4 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Intel Corporation nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
38 #include "cperf_test_throughput.h"
39 #include "cperf_ops.h"
41 struct cperf_throughput_results {
42 uint64_t ops_enqueued;
43 uint64_t ops_dequeued;
45 uint64_t ops_enqueued_failed;
46 uint64_t ops_dequeued_failed;
48 double ops_per_second;
49 double throughput_gbps;
50 double cycles_per_byte;
53 struct cperf_throughput_ctx {
58 struct rte_mempool *pkt_mbuf_pool_in;
59 struct rte_mempool *pkt_mbuf_pool_out;
60 struct rte_mbuf **mbufs_in;
61 struct rte_mbuf **mbufs_out;
63 struct rte_mempool *crypto_op_pool;
65 struct rte_cryptodev_sym_session *sess;
67 cperf_populate_ops_t populate_ops;
69 const struct cperf_options *options;
70 const struct cperf_test_vector *test_vector;
71 struct cperf_throughput_results results;
76 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx, uint32_t mbuf_nb)
82 rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
85 for (i = 0; i < mbuf_nb; i++)
86 rte_pktmbuf_free(ctx->mbufs_in[i]);
88 rte_free(ctx->mbufs_in);
92 for (i = 0; i < mbuf_nb; i++) {
93 if (ctx->mbufs_out[i] != NULL)
94 rte_pktmbuf_free(ctx->mbufs_out[i]);
97 rte_free(ctx->mbufs_out);
100 if (ctx->pkt_mbuf_pool_in)
101 rte_mempool_free(ctx->pkt_mbuf_pool_in);
103 if (ctx->pkt_mbuf_pool_out)
104 rte_mempool_free(ctx->pkt_mbuf_pool_out);
106 if (ctx->crypto_op_pool)
107 rte_mempool_free(ctx->crypto_op_pool);
113 static struct rte_mbuf *
114 cperf_mbuf_create(struct rte_mempool *mempool,
115 uint32_t segments_nb,
116 const struct cperf_options *options,
117 const struct cperf_test_vector *test_vector)
119 struct rte_mbuf *mbuf;
120 uint32_t segment_sz = options->buffer_sz / segments_nb;
121 uint32_t last_sz = options->buffer_sz % segments_nb;
124 (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
125 test_vector->plaintext.data :
126 test_vector->ciphertext.data;
128 mbuf = rte_pktmbuf_alloc(mempool);
132 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
133 if (mbuf_data == NULL)
136 memcpy(mbuf_data, test_data, segment_sz);
137 test_data += segment_sz;
140 while (segments_nb) {
143 m = rte_pktmbuf_alloc(mempool);
147 rte_pktmbuf_chain(mbuf, m);
149 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
150 if (mbuf_data == NULL)
153 memcpy(mbuf_data, test_data, segment_sz);
154 test_data += segment_sz;
159 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
160 if (mbuf_data == NULL)
163 memcpy(mbuf_data, test_data, last_sz);
166 if (options->op_type != CPERF_CIPHER_ONLY) {
167 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
168 options->auth_digest_sz);
169 if (mbuf_data == NULL)
173 if (options->op_type == CPERF_AEAD) {
174 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
175 RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
180 memcpy(aead, test_vector->aad.data, test_vector->aad.length);
186 rte_pktmbuf_free(mbuf);
192 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
193 const struct cperf_options *options,
194 const struct cperf_test_vector *test_vector,
195 const struct cperf_op_fns *op_fns)
197 struct cperf_throughput_ctx *ctx = NULL;
198 unsigned int mbuf_idx = 0;
199 char pool_name[32] = "";
201 ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0);
205 ctx->dev_id = dev_id;
208 ctx->populate_ops = op_fns->populate_ops;
209 ctx->options = options;
210 ctx->test_vector = test_vector;
212 ctx->sess = op_fns->sess_create(dev_id, options, test_vector);
213 if (ctx->sess == NULL)
216 snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
219 ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
220 options->pool_sz * options->segments_nb, 0, 0,
221 RTE_PKTMBUF_HEADROOM +
222 RTE_CACHE_LINE_ROUNDUP(
223 (options->buffer_sz / options->segments_nb) +
224 (options->buffer_sz % options->segments_nb) +
225 options->auth_digest_sz),
228 if (ctx->pkt_mbuf_pool_in == NULL)
231 /* Generate mbufs_in with plaintext populated for test */
232 if (ctx->options->pool_sz % ctx->options->burst_sz)
235 ctx->mbufs_in = rte_malloc(NULL,
236 (sizeof(struct rte_mbuf *) * ctx->options->pool_sz), 0);
238 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
239 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
240 ctx->pkt_mbuf_pool_in, options->segments_nb,
241 options, test_vector);
242 if (ctx->mbufs_in[mbuf_idx] == NULL)
246 if (options->out_of_place == 1) {
248 snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
251 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
252 pool_name, options->pool_sz, 0, 0,
253 RTE_PKTMBUF_HEADROOM +
254 RTE_CACHE_LINE_ROUNDUP(
256 options->auth_digest_sz),
259 if (ctx->pkt_mbuf_pool_out == NULL)
263 ctx->mbufs_out = rte_malloc(NULL,
264 (sizeof(struct rte_mbuf *) *
265 ctx->options->pool_sz), 0);
267 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
268 if (options->out_of_place == 1) {
269 ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
270 ctx->pkt_mbuf_pool_out, 1,
271 options, test_vector);
272 if (ctx->mbufs_out[mbuf_idx] == NULL)
275 ctx->mbufs_out[mbuf_idx] = NULL;
279 snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
282 ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
283 RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0,
285 if (ctx->crypto_op_pool == NULL)
290 cperf_throughput_test_free(ctx, mbuf_idx);
296 cperf_throughput_test_runner(void *test_ctx)
298 struct cperf_throughput_ctx *ctx = test_ctx;
300 uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0;
301 uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
303 uint64_t i, m_idx = 0, tsc_start, tsc_end, tsc_duration;
305 uint16_t ops_unused = 0;
307 struct rte_crypto_op *ops[ctx->options->burst_sz];
308 struct rte_crypto_op *ops_processed[ctx->options->burst_sz];
310 uint32_t lcore = rte_lcore_id();
312 #ifdef CPERF_LINEARIZATION_ENABLE
313 struct rte_cryptodev_info dev_info;
316 /* Check if source mbufs require coalescing */
317 if (ctx->options->segments_nb > 1) {
318 rte_cryptodev_info_get(ctx->dev_id, &dev_info);
319 if ((dev_info.feature_flags &
320 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
323 #endif /* CPERF_LINEARIZATION_ENABLE */
325 ctx->lcore_id = lcore;
327 if (!ctx->options->csv)
328 printf("\n# Running throughput test on device: %u, lcore: %u\n",
331 /* Warm up the host CPU before starting the test */
332 for (i = 0; i < ctx->options->total_ops; i++)
333 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
335 tsc_start = rte_rdtsc_precise();
337 while (ops_enqd_total < ctx->options->total_ops) {
339 uint16_t burst_size = ((ops_enqd_total + ctx->options->burst_sz)
340 <= ctx->options->total_ops) ?
341 ctx->options->burst_sz :
342 ctx->options->total_ops -
345 uint16_t ops_needed = burst_size - ops_unused;
347 /* Allocate crypto ops from pool */
348 if (ops_needed != rte_crypto_op_bulk_alloc(
350 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
354 /* Setup crypto op, attach mbuf etc */
355 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
356 &ctx->mbufs_out[m_idx],
357 ops_needed, ctx->sess, ctx->options,
360 #ifdef CPERF_LINEARIZATION_ENABLE
362 /* PMD doesn't support scatter-gather and source buffer
364 * We need to linearize it before enqueuing.
366 for (i = 0; i < burst_size; i++)
367 rte_pktmbuf_linearize(ops[i]->sym->m_src);
369 #endif /* CPERF_LINEARIZATION_ENABLE */
371 /* Enqueue burst of ops on crypto device */
372 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
374 if (ops_enqd < burst_size)
378 * Calculate number of ops not enqueued (mainly for hw
379 * accelerators whose ingress queue can fill up).
381 ops_unused = burst_size - ops_enqd;
382 ops_enqd_total += ops_enqd;
385 /* Dequeue processed burst of ops from crypto device */
386 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
387 ops_processed, ctx->options->burst_sz);
389 if (likely(ops_deqd)) {
390 /* free crypto ops so they can be reused. We don't free
391 * the mbufs here as we don't want to reuse them as
392 * the crypto operation will change the data and cause
395 for (i = 0; i < ops_deqd; i++)
396 rte_crypto_op_free(ops_processed[i]);
398 ops_deqd_total += ops_deqd;
401 * Count dequeue polls which didn't return any
402 * processed operations. This statistic is mainly
403 * relevant to hw accelerators.
409 m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ?
413 /* Dequeue any operations still in the crypto device */
415 while (ops_deqd_total < ctx->options->total_ops) {
416 /* Sending 0 length burst to flush sw crypto device */
417 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
420 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
421 ops_processed, ctx->options->burst_sz);
425 for (i = 0; i < ops_deqd; i++)
426 rte_crypto_op_free(ops_processed[i]);
428 ops_deqd_total += ops_deqd;
432 tsc_end = rte_rdtsc_precise();
433 tsc_duration = (tsc_end - tsc_start);
435 /* Calculate average operations processed per second */
436 ctx->results.ops_per_second = ((double)ctx->options->total_ops /
437 tsc_duration) * rte_get_tsc_hz();
439 /* Calculate average throughput (Gbps) in bits per second */
440 ctx->results.throughput_gbps = ((ctx->results.ops_per_second *
441 ctx->options->buffer_sz * 8) / 1000000000);
443 /* Calculate average cycles per byte */
444 ctx->results.cycles_per_byte = ((double)tsc_duration /
445 ctx->options->total_ops) / ctx->options->buffer_sz;
447 ctx->results.ops_enqueued = ops_enqd_total;
448 ctx->results.ops_dequeued = ops_deqd_total;
450 ctx->results.ops_enqueued_failed = ops_enqd_failed;
451 ctx->results.ops_dequeued_failed = ops_deqd_failed;
458 cperf_throughput_test_destructor(void *arg)
460 struct cperf_throughput_ctx *ctx = arg;
461 struct cperf_throughput_results *results = &ctx->results;
462 static int only_once;
467 if (!ctx->options->csv) {
468 printf("\n# Device %d on lcore %u\n",
469 ctx->dev_id, ctx->lcore_id);
470 printf("# Buffer Size(B)\t Enqueued\t Dequeued\tFailed Enq"
471 "\tFailed Deq\tOps(Millions)\tThroughput(Gbps)"
472 "\tCycles Per Byte\n");
474 printf("\n%16u\t%10"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
475 "%10"PRIu64"\t%16.4f\t%16.4f\t%15.2f\n",
476 ctx->options->buffer_sz,
477 results->ops_enqueued,
478 results->ops_dequeued,
479 results->ops_enqueued_failed,
480 results->ops_dequeued_failed,
481 results->ops_per_second/1000000,
482 results->throughput_gbps,
483 results->cycles_per_byte);
486 printf("\n# CPU lcore id, Burst Size(B), "
487 "Buffer Size(B),Enqueued,Dequeued,Failed Enq,"
488 "Failed Deq,Ops(Millions),Throughput(Gbps),"
489 "Cycles Per Byte\n");
492 printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
495 ctx->options->burst_sz,
496 ctx->options->buffer_sz,
497 results->ops_enqueued,
498 results->ops_dequeued,
499 results->ops_enqueued_failed,
500 results->ops_dequeued_failed,
501 results->ops_per_second/1000000,
502 results->throughput_gbps,
503 results->cycles_per_byte);
506 cperf_throughput_test_free(ctx, ctx->options->pool_sz);