4 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Intel Corporation nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
38 #include "cperf_test_throughput.h"
39 #include "cperf_ops.h"
41 struct cperf_throughput_results {
42 uint64_t ops_enqueued;
43 uint64_t ops_dequeued;
45 uint64_t ops_enqueued_failed;
46 uint64_t ops_dequeued_failed;
48 double ops_per_second;
49 double throughput_gbps;
50 double cycles_per_byte;
53 struct cperf_throughput_ctx {
58 struct rte_mempool *pkt_mbuf_pool_in;
59 struct rte_mempool *pkt_mbuf_pool_out;
60 struct rte_mbuf **mbufs_in;
61 struct rte_mbuf **mbufs_out;
63 struct rte_mempool *crypto_op_pool;
65 struct rte_cryptodev_sym_session *sess;
67 cperf_populate_ops_t populate_ops;
69 const struct cperf_options *options;
70 const struct cperf_test_vector *test_vector;
71 struct cperf_throughput_results results;
76 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx, uint32_t mbuf_nb)
82 rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
85 for (i = 0; i < mbuf_nb; i++)
86 rte_pktmbuf_free(ctx->mbufs_in[i]);
88 rte_free(ctx->mbufs_in);
92 for (i = 0; i < mbuf_nb; i++) {
93 if (ctx->mbufs_out[i] != NULL)
94 rte_pktmbuf_free(ctx->mbufs_out[i]);
97 rte_free(ctx->mbufs_out);
100 if (ctx->pkt_mbuf_pool_in)
101 rte_mempool_free(ctx->pkt_mbuf_pool_in);
103 if (ctx->pkt_mbuf_pool_out)
104 rte_mempool_free(ctx->pkt_mbuf_pool_out);
106 if (ctx->crypto_op_pool)
107 rte_mempool_free(ctx->crypto_op_pool);
113 static struct rte_mbuf *
114 cperf_mbuf_create(struct rte_mempool *mempool,
115 uint32_t segments_nb,
116 const struct cperf_options *options,
117 const struct cperf_test_vector *test_vector)
119 struct rte_mbuf *mbuf;
120 uint32_t segment_sz = options->buffer_sz / segments_nb;
121 uint32_t last_sz = options->buffer_sz % segments_nb;
124 (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
125 test_vector->plaintext.data :
126 test_vector->ciphertext.data;
128 mbuf = rte_pktmbuf_alloc(mempool);
132 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
133 if (mbuf_data == NULL)
136 memcpy(mbuf_data, test_data, segment_sz);
137 test_data += segment_sz;
140 while (segments_nb) {
143 m = rte_pktmbuf_alloc(mempool);
147 rte_pktmbuf_chain(mbuf, m);
149 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
150 if (mbuf_data == NULL)
153 memcpy(mbuf_data, test_data, segment_sz);
154 test_data += segment_sz;
159 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
160 if (mbuf_data == NULL)
163 memcpy(mbuf_data, test_data, last_sz);
166 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
167 options->auth_digest_sz);
168 if (mbuf_data == NULL)
171 if (options->op_type == CPERF_AEAD) {
172 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
173 RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
178 memcpy(aead, test_vector->aad.data, test_vector->aad.length);
184 rte_pktmbuf_free(mbuf);
190 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
191 const struct cperf_options *options,
192 const struct cperf_test_vector *test_vector,
193 const struct cperf_op_fns *op_fns)
195 struct cperf_throughput_ctx *ctx = NULL;
196 unsigned int mbuf_idx = 0;
197 char pool_name[32] = "";
199 ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0);
203 ctx->dev_id = dev_id;
206 ctx->populate_ops = op_fns->populate_ops;
207 ctx->options = options;
208 ctx->test_vector = test_vector;
210 ctx->sess = op_fns->sess_create(dev_id, options, test_vector);
211 if (ctx->sess == NULL)
214 snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
217 ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
218 options->pool_sz * options->segments_nb, 0, 0,
219 RTE_PKTMBUF_HEADROOM +
220 RTE_CACHE_LINE_ROUNDUP(
221 (options->buffer_sz / options->segments_nb) +
222 (options->buffer_sz % options->segments_nb) +
223 options->auth_digest_sz),
226 if (ctx->pkt_mbuf_pool_in == NULL)
229 /* Generate mbufs_in with plaintext populated for test */
230 if (ctx->options->pool_sz % ctx->options->burst_sz)
233 ctx->mbufs_in = rte_malloc(NULL,
234 (sizeof(struct rte_mbuf *) * ctx->options->pool_sz), 0);
236 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
237 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
238 ctx->pkt_mbuf_pool_in, options->segments_nb,
239 options, test_vector);
240 if (ctx->mbufs_in[mbuf_idx] == NULL)
244 if (options->out_of_place == 1) {
246 snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
249 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
250 pool_name, options->pool_sz, 0, 0,
251 RTE_PKTMBUF_HEADROOM +
252 RTE_CACHE_LINE_ROUNDUP(
254 options->auth_digest_sz),
257 if (ctx->pkt_mbuf_pool_out == NULL)
261 ctx->mbufs_out = rte_malloc(NULL,
262 (sizeof(struct rte_mbuf *) *
263 ctx->options->pool_sz), 0);
265 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
266 if (options->out_of_place == 1) {
267 ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
268 ctx->pkt_mbuf_pool_out, 1,
269 options, test_vector);
270 if (ctx->mbufs_out[mbuf_idx] == NULL)
273 ctx->mbufs_out[mbuf_idx] = NULL;
277 snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
280 ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
281 RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0,
283 if (ctx->crypto_op_pool == NULL)
288 cperf_throughput_test_free(ctx, mbuf_idx);
294 cperf_throughput_test_runner(void *test_ctx)
296 struct cperf_throughput_ctx *ctx = test_ctx;
298 uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0;
299 uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
301 uint64_t i, m_idx = 0, tsc_start, tsc_end, tsc_duration;
303 uint16_t ops_unused = 0;
305 struct rte_crypto_op *ops[ctx->options->burst_sz];
306 struct rte_crypto_op *ops_processed[ctx->options->burst_sz];
308 uint32_t lcore = rte_lcore_id();
310 #ifdef CPERF_LINEARIZATION_ENABLE
311 struct rte_cryptodev_info dev_info;
314 /* Check if source mbufs require coalescing */
315 if (ctx->options->segments_nb > 1) {
316 rte_cryptodev_info_get(ctx->dev_id, &dev_info);
317 if ((dev_info.feature_flags &
318 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
321 #endif /* CPERF_LINEARIZATION_ENABLE */
323 ctx->lcore_id = lcore;
325 if (!ctx->options->csv)
326 printf("\n# Running throughput test on device: %u, lcore: %u\n",
329 /* Warm up the host CPU before starting the test */
330 for (i = 0; i < ctx->options->total_ops; i++)
331 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
333 tsc_start = rte_rdtsc_precise();
335 while (ops_enqd_total < ctx->options->total_ops) {
337 uint16_t burst_size = ((ops_enqd_total + ctx->options->burst_sz)
338 <= ctx->options->total_ops) ?
339 ctx->options->burst_sz :
340 ctx->options->total_ops -
343 uint16_t ops_needed = burst_size - ops_unused;
345 /* Allocate crypto ops from pool */
346 if (ops_needed != rte_crypto_op_bulk_alloc(
348 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
352 /* Setup crypto op, attach mbuf etc */
353 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
354 &ctx->mbufs_out[m_idx],
355 ops_needed, ctx->sess, ctx->options,
358 #ifdef CPERF_LINEARIZATION_ENABLE
360 /* PMD doesn't support scatter-gather and source buffer
362 * We need to linearize it before enqueuing.
364 for (i = 0; i < burst_size; i++)
365 rte_pktmbuf_linearize(ops[i]->sym->m_src);
367 #endif /* CPERF_LINEARIZATION_ENABLE */
369 /* Enqueue burst of ops on crypto device */
370 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
372 if (ops_enqd < burst_size)
376 * Calculate number of ops not enqueued (mainly for hw
377 * accelerators whose ingress queue can fill up).
379 ops_unused = burst_size - ops_enqd;
380 ops_enqd_total += ops_enqd;
383 /* Dequeue processed burst of ops from crypto device */
384 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
385 ops_processed, ctx->options->burst_sz);
387 if (likely(ops_deqd)) {
388 /* free crypto ops so they can be reused. We don't free
389 * the mbufs here as we don't want to reuse them as
390 * the crypto operation will change the data and cause
393 for (i = 0; i < ops_deqd; i++)
394 rte_crypto_op_free(ops_processed[i]);
396 ops_deqd_total += ops_deqd;
399 * Count dequeue polls which didn't return any
400 * processed operations. This statistic is mainly
401 * relevant to hw accelerators.
407 m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ?
411 /* Dequeue any operations still in the crypto device */
413 while (ops_deqd_total < ctx->options->total_ops) {
414 /* Sending 0 length burst to flush sw crypto device */
415 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
418 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
419 ops_processed, ctx->options->burst_sz);
423 for (i = 0; i < ops_deqd; i++)
424 rte_crypto_op_free(ops_processed[i]);
426 ops_deqd_total += ops_deqd;
430 tsc_end = rte_rdtsc_precise();
431 tsc_duration = (tsc_end - tsc_start);
433 /* Calculate average operations processed per second */
434 ctx->results.ops_per_second = ((double)ctx->options->total_ops /
435 tsc_duration) * rte_get_tsc_hz();
437 /* Calculate average throughput (Gbps) in bits per second */
438 ctx->results.throughput_gbps = ((ctx->results.ops_per_second *
439 ctx->options->buffer_sz * 8) / 1000000000);
441 /* Calculate average cycles per byte */
442 ctx->results.cycles_per_byte = ((double)tsc_duration /
443 ctx->options->total_ops) / ctx->options->buffer_sz;
445 ctx->results.ops_enqueued = ops_enqd_total;
446 ctx->results.ops_dequeued = ops_deqd_total;
448 ctx->results.ops_enqueued_failed = ops_enqd_failed;
449 ctx->results.ops_dequeued_failed = ops_deqd_failed;
456 cperf_throughput_test_destructor(void *arg)
458 struct cperf_throughput_ctx *ctx = arg;
459 struct cperf_throughput_results *results = &ctx->results;
460 static int only_once;
465 if (!ctx->options->csv) {
466 printf("\n# Device %d on lcore %u\n",
467 ctx->dev_id, ctx->lcore_id);
468 printf("# Buffer Size(B)\t Enqueued\t Dequeued\tFailed Enq"
469 "\tFailed Deq\tOps(Millions)\tThroughput(Gbps)"
470 "\tCycles Per Byte\n");
472 printf("\n%16u\t%10"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
473 "%10"PRIu64"\t%16.4f\t%16.4f\t%15.2f\n",
474 ctx->options->buffer_sz,
475 results->ops_enqueued,
476 results->ops_dequeued,
477 results->ops_enqueued_failed,
478 results->ops_dequeued_failed,
479 results->ops_per_second/1000000,
480 results->throughput_gbps,
481 results->cycles_per_byte);
484 printf("\n# CPU lcore id, Burst Size(B), "
485 "Buffer Size(B),Enqueued,Dequeued,Failed Enq,"
486 "Failed Deq,Ops(Millions),Throughput(Gbps),"
487 "Cycles Per Byte\n");
490 printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
493 ctx->options->burst_sz,
494 ctx->options->buffer_sz,
495 results->ops_enqueued,
496 results->ops_dequeued,
497 results->ops_enqueued_failed,
498 results->ops_dequeued_failed,
499 results->ops_per_second/1000000,
500 results->throughput_gbps,
501 results->cycles_per_byte);
504 cperf_throughput_test_free(ctx, ctx->options->pool_sz);