4 * Copyright(c) 2017 Intel Corporation. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Intel Corporation nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 #include <rte_cycles.h>
38 #include <rte_malloc.h>
40 #include "cperf_ops.h"
41 #include "cperf_test_pmd_cyclecount.h"
43 #define PRETTY_HDR_FMT "%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n"
44 #define PRETTY_LINE_FMT "%12u%12u%12u%12u%12u%12u%12u%12.0f%12.0f%12.0f\n"
45 #define CSV_HDR_FMT "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"
46 #define CSV_LINE_FMT "%10u;%10u;%u;%u;%u;%u;%u;%.f3;%.f3;%.f3\n"
48 struct cperf_pmd_cyclecount_ctx {
53 struct rte_mempool *pkt_mbuf_pool_in;
54 struct rte_mempool *pkt_mbuf_pool_out;
55 struct rte_mbuf **mbufs_in;
56 struct rte_mbuf **mbufs_out;
58 struct rte_mempool *crypto_op_pool;
59 struct rte_crypto_op **ops;
60 struct rte_crypto_op **ops_processed;
62 struct rte_cryptodev_sym_session *sess;
64 cperf_populate_ops_t populate_ops;
66 const struct cperf_options *options;
67 const struct cperf_test_vector *test_vector;
70 struct pmd_cyclecount_state {
71 struct cperf_pmd_cyclecount_ctx *ctx;
72 const struct cperf_options *opts;
78 uint32_t ops_enq_retries;
79 uint32_t ops_deq_retries;
80 double cycles_per_build;
81 double cycles_per_enq;
82 double cycles_per_deq;
85 static const uint16_t iv_offset =
86 sizeof(struct rte_crypto_op) + sizeof(struct rte_crypto_sym_op);
89 cperf_pmd_cyclecount_test_free(struct cperf_pmd_cyclecount_ctx *ctx,
96 rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
97 rte_cryptodev_sym_session_free(ctx->sess);
101 for (i = 0; i < mbuf_nb; i++)
102 rte_pktmbuf_free(ctx->mbufs_in[i]);
104 rte_free(ctx->mbufs_in);
107 if (ctx->mbufs_out) {
108 for (i = 0; i < mbuf_nb; i++) {
109 if (ctx->mbufs_out[i] != NULL)
110 rte_pktmbuf_free(ctx->mbufs_out[i]);
113 rte_free(ctx->mbufs_out);
116 if (ctx->pkt_mbuf_pool_in)
117 rte_mempool_free(ctx->pkt_mbuf_pool_in);
119 if (ctx->pkt_mbuf_pool_out)
120 rte_mempool_free(ctx->pkt_mbuf_pool_out);
125 if (ctx->ops_processed)
126 rte_free(ctx->ops_processed);
128 if (ctx->crypto_op_pool)
129 rte_mempool_free(ctx->crypto_op_pool);
135 static struct rte_mbuf *
136 cperf_mbuf_create(struct rte_mempool *mempool, uint32_t segments_nb,
137 const struct cperf_options *options,
138 const struct cperf_test_vector *test_vector)
140 struct rte_mbuf *mbuf;
141 uint32_t segment_sz = options->max_buffer_size / segments_nb;
142 uint32_t last_sz = options->max_buffer_size % segments_nb;
145 (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
146 test_vector->plaintext.data :
147 test_vector->ciphertext.data;
149 mbuf = rte_pktmbuf_alloc(mempool);
153 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
154 if (mbuf_data == NULL)
157 memcpy(mbuf_data, test_data, segment_sz);
158 test_data += segment_sz;
161 while (segments_nb) {
164 m = rte_pktmbuf_alloc(mempool);
168 rte_pktmbuf_chain(mbuf, m);
170 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
171 if (mbuf_data == NULL)
174 memcpy(mbuf_data, test_data, segment_sz);
175 test_data += segment_sz;
180 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
181 if (mbuf_data == NULL)
184 memcpy(mbuf_data, test_data, last_sz);
187 if (options->op_type != CPERF_CIPHER_ONLY) {
188 mbuf_data = (uint8_t *)rte_pktmbuf_append(
189 mbuf, options->digest_sz);
190 if (mbuf_data == NULL)
194 if (options->op_type == CPERF_AEAD) {
195 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(
196 mbuf, RTE_ALIGN_CEIL(options->aead_aad_sz, 16));
201 memcpy(aead, test_vector->aad.data, test_vector->aad.length);
207 rte_pktmbuf_free(mbuf);
213 cperf_pmd_cyclecount_test_constructor(struct rte_mempool *sess_mp,
214 uint8_t dev_id, uint16_t qp_id,
215 const struct cperf_options *options,
216 const struct cperf_test_vector *test_vector,
217 const struct cperf_op_fns *op_fns)
219 struct cperf_pmd_cyclecount_ctx *ctx = NULL;
220 unsigned int mbuf_idx = 0;
221 char pool_name[32] = "";
222 uint16_t dataroom_sz = RTE_PKTMBUF_HEADROOM +
223 RTE_CACHE_LINE_ROUNDUP(
224 (options->max_buffer_size /
225 options->segments_nb) +
226 (options->max_buffer_size %
227 options->segments_nb) +
230 /* preallocate buffers for crypto ops as they can get quite big */
231 size_t alloc_sz = sizeof(struct rte_crypto_op *) *
232 options->nb_descriptors;
234 ctx = rte_malloc(NULL, sizeof(struct cperf_pmd_cyclecount_ctx), 0);
238 ctx->dev_id = dev_id;
241 ctx->populate_ops = op_fns->populate_ops;
242 ctx->options = options;
243 ctx->test_vector = test_vector;
245 /* IV goes at the end of the crypto operation */
246 uint16_t iv_offset = sizeof(struct rte_crypto_op) +
247 sizeof(struct rte_crypto_sym_op);
249 ctx->sess = op_fns->sess_create(
250 sess_mp, dev_id, options, test_vector, iv_offset);
251 if (ctx->sess == NULL)
254 snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d", dev_id);
256 ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
257 options->pool_sz * options->segments_nb, 0, 0,
258 dataroom_sz, rte_socket_id());
260 if (ctx->pkt_mbuf_pool_in == NULL)
263 /* Generate mbufs_in with plaintext populated for test */
264 ctx->mbufs_in = rte_malloc(NULL,
265 (sizeof(struct rte_mbuf *) * options->pool_sz), 0);
267 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
268 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
269 ctx->pkt_mbuf_pool_in, options->segments_nb,
270 options, test_vector);
271 if (ctx->mbufs_in[mbuf_idx] == NULL)
275 if (options->out_of_place == 1) {
276 snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d",
279 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(pool_name,
280 options->pool_sz, 0, 0, dataroom_sz,
283 if (ctx->pkt_mbuf_pool_out == NULL)
287 ctx->mbufs_out = rte_malloc(NULL,
288 (sizeof(struct rte_mbuf *) * options->pool_sz), 0);
290 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
291 if (options->out_of_place == 1) {
292 ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
293 ctx->pkt_mbuf_pool_out, 1, options,
295 if (ctx->mbufs_out[mbuf_idx] == NULL)
298 ctx->mbufs_out[mbuf_idx] = NULL;
302 snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d", dev_id);
304 uint16_t priv_size = test_vector->cipher_iv.length +
305 test_vector->auth_iv.length +
306 test_vector->aead_iv.length;
308 ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
309 RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 512,
310 priv_size, rte_socket_id());
311 if (ctx->crypto_op_pool == NULL)
314 ctx->ops = rte_malloc("ops", alloc_sz, 0);
318 ctx->ops_processed = rte_malloc("ops_processed", alloc_sz, 0);
319 if (!ctx->ops_processed)
325 cperf_pmd_cyclecount_test_free(ctx, mbuf_idx);
330 /* benchmark alloc-build-free of ops */
332 pmd_cyclecount_bench_ops(struct pmd_cyclecount_state *state, uint32_t cur_op,
333 uint16_t test_burst_size)
335 uint32_t iter_ops_left = state->opts->total_ops - cur_op;
336 uint32_t iter_ops_needed =
337 RTE_MIN(state->opts->nb_descriptors, iter_ops_left);
338 uint32_t cur_iter_op;
340 for (cur_iter_op = 0; cur_iter_op < iter_ops_needed;
341 cur_iter_op += test_burst_size) {
342 uint32_t burst_size = RTE_MIN(state->opts->total_ops - cur_op,
344 struct rte_crypto_op **ops = &state->ctx->ops[cur_iter_op];
346 if (burst_size != rte_crypto_op_bulk_alloc(
347 state->ctx->crypto_op_pool,
348 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
352 /* Setup crypto op, attach mbuf etc */
353 (state->ctx->populate_ops)(ops,
354 &state->ctx->mbufs_in[cur_iter_op],
355 &state->ctx->mbufs_out[cur_iter_op], burst_size,
356 state->ctx->sess, state->opts,
357 state->ctx->test_vector, iv_offset);
359 #ifdef CPERF_LINEARIZATION_ENABLE
360 /* Check if source mbufs require coalescing */
361 if (state->linearize) {
363 for (i = 0; i < burst_size; i++) {
364 struct rte_mbuf *src = ops[i]->sym->m_src;
365 rte_pktmbuf_linearize(src);
368 #endif /* CPERF_LINEARIZATION_ENABLE */
369 rte_mempool_put_bulk(state->ctx->crypto_op_pool, (void **)ops,
376 /* allocate and build ops (no free) */
378 pmd_cyclecount_build_ops(struct pmd_cyclecount_state *state,
379 uint32_t iter_ops_needed, uint16_t test_burst_size)
381 uint32_t cur_iter_op;
383 for (cur_iter_op = 0; cur_iter_op < iter_ops_needed;
384 cur_iter_op += test_burst_size) {
385 uint32_t burst_size = RTE_MIN(
386 iter_ops_needed - cur_iter_op, test_burst_size);
387 struct rte_crypto_op **ops = &state->ctx->ops[cur_iter_op];
389 if (burst_size != rte_crypto_op_bulk_alloc(
390 state->ctx->crypto_op_pool,
391 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
395 /* Setup crypto op, attach mbuf etc */
396 (state->ctx->populate_ops)(ops,
397 &state->ctx->mbufs_in[cur_iter_op],
398 &state->ctx->mbufs_out[cur_iter_op], burst_size,
399 state->ctx->sess, state->opts,
400 state->ctx->test_vector, iv_offset);
405 /* benchmark enqueue, returns number of ops enqueued */
407 pmd_cyclecount_bench_enq(struct pmd_cyclecount_state *state,
408 uint32_t iter_ops_needed, uint16_t test_burst_size)
410 /* Enqueue full descriptor ring of ops on crypto device */
411 uint32_t cur_iter_op = 0;
412 while (cur_iter_op < iter_ops_needed) {
413 uint32_t burst_size = RTE_MIN(iter_ops_needed - cur_iter_op,
415 struct rte_crypto_op **ops = &state->ctx->ops[cur_iter_op];
418 burst_enqd = rte_cryptodev_enqueue_burst(state->ctx->dev_id,
419 state->ctx->qp_id, ops, burst_size);
421 /* if we couldn't enqueue anything, the queue is full */
423 /* don't try to dequeue anything we didn't enqueue */
427 if (burst_enqd < burst_size)
428 state->ops_enq_retries++;
429 state->ops_enqd += burst_enqd;
430 cur_iter_op += burst_enqd;
432 return iter_ops_needed;
435 /* benchmark dequeue */
437 pmd_cyclecount_bench_deq(struct pmd_cyclecount_state *state,
438 uint32_t iter_ops_needed, uint16_t test_burst_size)
440 /* Dequeue full descriptor ring of ops on crypto device */
441 uint32_t cur_iter_op = 0;
442 while (cur_iter_op < iter_ops_needed) {
443 uint32_t burst_size = RTE_MIN(iter_ops_needed - cur_iter_op,
445 struct rte_crypto_op **ops_processed =
446 &state->ctx->ops[cur_iter_op];
449 burst_deqd = rte_cryptodev_dequeue_burst(state->ctx->dev_id,
450 state->ctx->qp_id, ops_processed, burst_size);
452 if (burst_deqd < burst_size)
453 state->ops_deq_retries++;
454 state->ops_deqd += burst_deqd;
455 cur_iter_op += burst_deqd;
459 /* run benchmark per burst size */
461 pmd_cyclecount_bench_burst_sz(
462 struct pmd_cyclecount_state *state, uint16_t test_burst_size)
471 /* reset all counters */
475 state->ops_enq_retries = 0;
477 state->ops_deq_retries = 0;
480 * Benchmark crypto op alloc-build-free separately.
482 tsc_start = rte_rdtsc_precise();
484 for (cur_op = 0; cur_op < state->opts->total_ops;
485 cur_op += state->opts->nb_descriptors) {
486 if (unlikely(pmd_cyclecount_bench_ops(
487 state, cur_op, test_burst_size)))
491 tsc_end = rte_rdtsc_precise();
492 tsc_op = tsc_end - tsc_start;
496 * Hardware acceleration cyclecount benchmarking loop.
498 * We're benchmarking raw enq/deq performance by filling up the device
499 * queue, so we never get any failed enqs unless the driver won't accept
500 * the exact number of descriptors we requested, or the driver won't
501 * wrap around the end of the TX ring. However, since we're only
502 * dequeueing once we've filled up the queue, we have to benchmark it
503 * piecemeal and then average out the results.
506 while (cur_op < state->opts->total_ops) {
507 uint32_t iter_ops_left = state->opts->total_ops - cur_op;
508 uint32_t iter_ops_needed = RTE_MIN(
509 state->opts->nb_descriptors, iter_ops_left);
510 uint32_t iter_ops_allocd = iter_ops_needed;
512 /* allocate and build ops */
513 if (unlikely(pmd_cyclecount_build_ops(state, iter_ops_needed,
517 tsc_start = rte_rdtsc_precise();
519 /* fill up TX ring */
520 iter_ops_needed = pmd_cyclecount_bench_enq(state,
521 iter_ops_needed, test_burst_size);
523 tsc_end = rte_rdtsc_precise();
525 tsc_enq += tsc_end - tsc_start;
527 /* allow for HW to catch up */
529 rte_delay_us_block(state->delay);
531 tsc_start = rte_rdtsc_precise();
534 pmd_cyclecount_bench_deq(state, iter_ops_needed,
537 tsc_end = rte_rdtsc_precise();
539 tsc_deq += tsc_end - tsc_start;
541 cur_op += iter_ops_needed;
544 * we may not have processed all ops that we allocated, so
545 * free everything we've allocated.
547 rte_mempool_put_bulk(state->ctx->crypto_op_pool,
548 (void **)state->ctx->ops, iter_ops_allocd);
551 state->cycles_per_build = (double)tsc_op / state->opts->total_ops;
552 state->cycles_per_enq = (double)tsc_enq / state->ops_enqd;
553 state->cycles_per_deq = (double)tsc_deq / state->ops_deqd;
559 cperf_pmd_cyclecount_test_runner(void *test_ctx)
561 struct pmd_cyclecount_state state = {0};
562 const struct cperf_options *opts;
563 uint16_t test_burst_size;
564 uint8_t burst_size_idx = 0;
566 state.ctx = test_ctx;
567 opts = state.ctx->options;
569 state.lcore = rte_lcore_id();
572 static int only_once;
573 static bool warmup = true;
576 * We need a small delay to allow for hardware to process all the crypto
577 * operations. We can't automatically figure out what the delay should
578 * be, so we leave it up to the user (by default it's 0).
580 state.delay = 1000 * opts->pmdcc_delay;
582 #ifdef CPERF_LINEARIZATION_ENABLE
583 struct rte_cryptodev_info dev_info;
585 /* Check if source mbufs require coalescing */
586 if (opts->segments_nb > 1) {
587 rte_cryptodev_info_get(state.ctx->dev_id, &dev_info);
588 if ((dev_info.feature_flags &
589 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) ==
594 #endif /* CPERF_LINEARIZATION_ENABLE */
596 state.ctx->lcore_id = state.lcore;
598 /* Get first size from range or list */
599 if (opts->inc_burst_size != 0)
600 test_burst_size = opts->min_burst_size;
602 test_burst_size = opts->burst_size_list[0];
604 while (test_burst_size <= opts->max_burst_size) {
605 /* do a benchmark run */
606 if (pmd_cyclecount_bench_burst_sz(&state, test_burst_size))
610 * First run is always a warm up run.
619 printf(PRETTY_HDR_FMT, "lcore id", "Buf Size",
620 "Burst Size", "Enqueued",
621 "Dequeued", "Enq Retries",
622 "Deq Retries", "Cycles/Op",
623 "Cycles/Enq", "Cycles/Deq");
626 printf(PRETTY_LINE_FMT, state.ctx->lcore_id,
627 opts->test_buffer_size, test_burst_size,
628 state.ops_enqd, state.ops_deqd,
629 state.ops_enq_retries,
630 state.ops_deq_retries,
631 state.cycles_per_build,
632 state.cycles_per_enq,
633 state.cycles_per_deq);
636 printf(CSV_HDR_FMT, "# lcore id", "Buf Size",
637 "Burst Size", "Enqueued",
638 "Dequeued", "Enq Retries",
639 "Deq Retries", "Cycles/Op",
640 "Cycles/Enq", "Cycles/Deq");
643 printf(CSV_LINE_FMT, state.ctx->lcore_id,
644 opts->test_buffer_size, test_burst_size,
645 state.ops_enqd, state.ops_deqd,
646 state.ops_enq_retries,
647 state.ops_deq_retries,
648 state.cycles_per_build,
649 state.cycles_per_enq,
650 state.cycles_per_deq);
653 /* Get next size from range or list */
654 if (opts->inc_burst_size != 0)
655 test_burst_size += opts->inc_burst_size;
657 if (++burst_size_idx == opts->burst_size_count)
659 test_burst_size = opts->burst_size_list[burst_size_idx];
667 cperf_pmd_cyclecount_test_destructor(void *arg)
669 struct cperf_pmd_cyclecount_ctx *ctx = arg;
674 cperf_pmd_cyclecount_test_free(ctx, ctx->options->pool_sz);