app/crypto-perf: do not append digest if not used
[dpdk.git] / app / test-crypto-perf / cperf_test_latency.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40
41
42 struct cperf_latency_results {
43
44         uint64_t ops_failed;
45
46         uint64_t enqd_tot;
47         uint64_t enqd_max;
48         uint64_t enqd_min;
49
50         uint64_t deqd_tot;
51         uint64_t deqd_max;
52         uint64_t deqd_min;
53
54         uint64_t cycles_tot;
55         uint64_t cycles_max;
56         uint64_t cycles_min;
57
58         uint64_t burst_num;
59         uint64_t num;
60 };
61
62 struct cperf_op_result {
63         uint64_t tsc_start;
64         uint64_t tsc_end;
65         enum rte_crypto_op_status status;
66 };
67
68 struct cperf_latency_ctx {
69         uint8_t dev_id;
70         uint16_t qp_id;
71         uint8_t lcore_id;
72
73         struct rte_mempool *pkt_mbuf_pool_in;
74         struct rte_mempool *pkt_mbuf_pool_out;
75         struct rte_mbuf **mbufs_in;
76         struct rte_mbuf **mbufs_out;
77
78         struct rte_mempool *crypto_op_pool;
79
80         struct rte_cryptodev_sym_session *sess;
81
82         cperf_populate_ops_t populate_ops;
83
84         const struct cperf_options *options;
85         const struct cperf_test_vector *test_vector;
86         struct cperf_op_result *res;
87         struct cperf_latency_results results;
88 };
89
90 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
91 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
92
93 static void
94 cperf_latency_test_free(struct cperf_latency_ctx *ctx, uint32_t mbuf_nb)
95 {
96         uint32_t i;
97
98         if (ctx) {
99                 if (ctx->sess)
100                         rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
101
102                 if (ctx->mbufs_in) {
103                         for (i = 0; i < mbuf_nb; i++)
104                                 rte_pktmbuf_free(ctx->mbufs_in[i]);
105
106                         rte_free(ctx->mbufs_in);
107                 }
108
109                 if (ctx->mbufs_out) {
110                         for (i = 0; i < mbuf_nb; i++) {
111                                 if (ctx->mbufs_out[i] != NULL)
112                                         rte_pktmbuf_free(ctx->mbufs_out[i]);
113                         }
114
115                         rte_free(ctx->mbufs_out);
116                 }
117
118                 if (ctx->pkt_mbuf_pool_in)
119                         rte_mempool_free(ctx->pkt_mbuf_pool_in);
120
121                 if (ctx->pkt_mbuf_pool_out)
122                         rte_mempool_free(ctx->pkt_mbuf_pool_out);
123
124                 if (ctx->crypto_op_pool)
125                         rte_mempool_free(ctx->crypto_op_pool);
126
127                 rte_free(ctx->res);
128                 rte_free(ctx);
129         }
130 }
131
132 static struct rte_mbuf *
133 cperf_mbuf_create(struct rte_mempool *mempool,
134                 uint32_t segments_nb,
135                 const struct cperf_options *options,
136                 const struct cperf_test_vector *test_vector)
137 {
138         struct rte_mbuf *mbuf;
139         uint32_t segment_sz = options->buffer_sz / segments_nb;
140         uint32_t last_sz = options->buffer_sz % segments_nb;
141         uint8_t *mbuf_data;
142         uint8_t *test_data =
143                         (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
144                                         test_vector->plaintext.data :
145                                         test_vector->ciphertext.data;
146
147         mbuf = rte_pktmbuf_alloc(mempool);
148         if (mbuf == NULL)
149                 goto error;
150
151         mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
152         if (mbuf_data == NULL)
153                 goto error;
154
155         memcpy(mbuf_data, test_data, segment_sz);
156         test_data += segment_sz;
157         segments_nb--;
158
159         while (segments_nb) {
160                 struct rte_mbuf *m;
161
162                 m = rte_pktmbuf_alloc(mempool);
163                 if (m == NULL)
164                         goto error;
165
166                 rte_pktmbuf_chain(mbuf, m);
167
168                 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
169                 if (mbuf_data == NULL)
170                         goto error;
171
172                 memcpy(mbuf_data, test_data, segment_sz);
173                 test_data += segment_sz;
174                 segments_nb--;
175         }
176
177         if (last_sz) {
178                 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
179                 if (mbuf_data == NULL)
180                         goto error;
181
182                 memcpy(mbuf_data, test_data, last_sz);
183         }
184
185         if (options->op_type != CPERF_CIPHER_ONLY) {
186                 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
187                         options->auth_digest_sz);
188                 if (mbuf_data == NULL)
189                         goto error;
190         }
191
192         if (options->op_type == CPERF_AEAD) {
193                 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
194                         RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
195
196                 if (aead == NULL)
197                         goto error;
198
199                 memcpy(aead, test_vector->aad.data, test_vector->aad.length);
200         }
201
202         return mbuf;
203 error:
204         if (mbuf != NULL)
205                 rte_pktmbuf_free(mbuf);
206
207         return NULL;
208 }
209
210 void *
211 cperf_latency_test_constructor(uint8_t dev_id, uint16_t qp_id,
212                 const struct cperf_options *options,
213                 const struct cperf_test_vector *test_vector,
214                 const struct cperf_op_fns *op_fns)
215 {
216         struct cperf_latency_ctx *ctx = NULL;
217         unsigned int mbuf_idx = 0;
218         char pool_name[32] = "";
219
220         ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
221         if (ctx == NULL)
222                 goto err;
223
224         ctx->dev_id = dev_id;
225         ctx->qp_id = qp_id;
226
227         ctx->populate_ops = op_fns->populate_ops;
228         ctx->options = options;
229         ctx->test_vector = test_vector;
230
231         ctx->sess = op_fns->sess_create(dev_id, options, test_vector);
232         if (ctx->sess == NULL)
233                 goto err;
234
235         snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
236                                 dev_id);
237
238         ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
239                         options->pool_sz * options->segments_nb, 0, 0,
240                         RTE_PKTMBUF_HEADROOM +
241                         RTE_CACHE_LINE_ROUNDUP(
242                                 (options->buffer_sz / options->segments_nb) +
243                                 (options->buffer_sz % options->segments_nb) +
244                                         options->auth_digest_sz),
245                         rte_socket_id());
246
247         if (ctx->pkt_mbuf_pool_in == NULL)
248                 goto err;
249
250         /* Generate mbufs_in with plaintext populated for test */
251         if (ctx->options->pool_sz % ctx->options->burst_sz)
252                 goto err;
253
254         ctx->mbufs_in = rte_malloc(NULL,
255                         (sizeof(struct rte_mbuf *) *
256                         ctx->options->pool_sz), 0);
257
258         for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
259                 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
260                                 ctx->pkt_mbuf_pool_in, options->segments_nb,
261                                 options, test_vector);
262                 if (ctx->mbufs_in[mbuf_idx] == NULL)
263                         goto err;
264         }
265
266         if (options->out_of_place == 1) {
267
268                 snprintf(pool_name, sizeof(pool_name),
269                                 "cperf_pool_out_cdev_%d",
270                                 dev_id);
271
272                 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
273                                 pool_name, options->pool_sz, 0, 0,
274                                 RTE_PKTMBUF_HEADROOM +
275                                 RTE_CACHE_LINE_ROUNDUP(
276                                         options->buffer_sz +
277                                         options->auth_digest_sz),
278                                 rte_socket_id());
279
280                 if (ctx->pkt_mbuf_pool_out == NULL)
281                         goto err;
282         }
283
284         ctx->mbufs_out = rte_malloc(NULL,
285                         (sizeof(struct rte_mbuf *) *
286                         ctx->options->pool_sz), 0);
287
288         for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
289                 if (options->out_of_place == 1) {
290                         ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
291                                         ctx->pkt_mbuf_pool_out, 1,
292                                         options, test_vector);
293                         if (ctx->mbufs_out[mbuf_idx] == NULL)
294                                 goto err;
295                 } else {
296                         ctx->mbufs_out[mbuf_idx] = NULL;
297                 }
298         }
299
300         snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
301                         dev_id);
302
303         ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
304                         RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0,
305                         rte_socket_id());
306         if (ctx->crypto_op_pool == NULL)
307                 goto err;
308
309         ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
310                         ctx->options->total_ops, 0);
311
312         if (ctx->res == NULL)
313                 goto err;
314
315         return ctx;
316 err:
317         cperf_latency_test_free(ctx, mbuf_idx);
318
319         return NULL;
320 }
321
322 int
323 cperf_latency_test_runner(void *arg)
324 {
325         struct cperf_latency_ctx *ctx = arg;
326         struct cperf_op_result *pres;
327
328         if (ctx == NULL)
329                 return 0;
330
331         struct rte_crypto_op *ops[ctx->options->burst_sz];
332         struct rte_crypto_op *ops_processed[ctx->options->burst_sz];
333         uint64_t ops_enqd = 0, ops_deqd = 0;
334         uint64_t m_idx = 0, b_idx = 0, i;
335
336         uint64_t tsc_val, tsc_end, tsc_start;
337         uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
338         uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
339         uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
340
341         uint32_t lcore = rte_lcore_id();
342
343 #ifdef CPERF_LINEARIZATION_ENABLE
344         struct rte_cryptodev_info dev_info;
345         int linearize = 0;
346
347         /* Check if source mbufs require coalescing */
348         if (ctx->options->segments_nb > 1) {
349                 rte_cryptodev_info_get(ctx->dev_id, &dev_info);
350                 if ((dev_info.feature_flags &
351                                 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
352                         linearize = 1;
353         }
354 #endif /* CPERF_LINEARIZATION_ENABLE */
355
356         ctx->lcore_id = lcore;
357
358         /* Warm up the host CPU before starting the test */
359         for (i = 0; i < ctx->options->total_ops; i++)
360                 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
361
362         while (enqd_tot < ctx->options->total_ops) {
363
364                 uint16_t burst_size = ((enqd_tot + ctx->options->burst_sz)
365                                 <= ctx->options->total_ops) ?
366                                                 ctx->options->burst_sz :
367                                                 ctx->options->total_ops -
368                                                 enqd_tot;
369
370                 /* Allocate crypto ops from pool */
371                 if (burst_size != rte_crypto_op_bulk_alloc(
372                                 ctx->crypto_op_pool,
373                                 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
374                                 ops, burst_size))
375                         return -1;
376
377                 /* Setup crypto op, attach mbuf etc */
378                 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
379                                 &ctx->mbufs_out[m_idx],
380                                 burst_size, ctx->sess, ctx->options,
381                                 ctx->test_vector);
382
383                 tsc_start = rte_rdtsc_precise();
384
385 #ifdef CPERF_LINEARIZATION_ENABLE
386                 if (linearize) {
387                         /* PMD doesn't support scatter-gather and source buffer
388                          * is segmented.
389                          * We need to linearize it before enqueuing.
390                          */
391                         for (i = 0; i < burst_size; i++)
392                                 rte_pktmbuf_linearize(ops[i]->sym->m_src);
393                 }
394 #endif /* CPERF_LINEARIZATION_ENABLE */
395
396                 /* Enqueue burst of ops on crypto device */
397                 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
398                                 ops, burst_size);
399
400                 /* Dequeue processed burst of ops from crypto device */
401                 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
402                                 ops_processed, ctx->options->burst_sz);
403
404                 tsc_end = rte_rdtsc_precise();
405
406                 for (i = 0; i < ops_enqd; i++) {
407                         ctx->res[tsc_idx].tsc_start = tsc_start;
408                         ops[i]->opaque_data = (void *)&ctx->res[tsc_idx];
409                         tsc_idx++;
410                 }
411
412                 /* Free memory for not enqueued operations */
413                 for (i = ops_enqd; i < burst_size; i++)
414                         rte_crypto_op_free(ops[i]);
415
416                 if (likely(ops_deqd))  {
417                         /*
418                          * free crypto ops so they can be reused. We don't free
419                          * the mbufs here as we don't want to reuse them as
420                          * the crypto operation will change the data and cause
421                          * failures.
422                          */
423                         for (i = 0; i < ops_deqd; i++) {
424                                 pres = (struct cperf_op_result *)
425                                                 (ops_processed[i]->opaque_data);
426                                 pres->status = ops_processed[i]->status;
427                                 pres->tsc_end = tsc_end;
428
429                                 rte_crypto_op_free(ops_processed[i]);
430                         }
431
432                         deqd_tot += ops_deqd;
433                         deqd_max = max(ops_deqd, deqd_max);
434                         deqd_min = min(ops_deqd, deqd_min);
435                 }
436
437                 enqd_tot += ops_enqd;
438                 enqd_max = max(ops_enqd, enqd_max);
439                 enqd_min = min(ops_enqd, enqd_min);
440
441                 m_idx += ops_enqd;
442                 m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ?
443                                 0 : m_idx;
444                 b_idx++;
445         }
446
447         /* Dequeue any operations still in the crypto device */
448         while (deqd_tot < ctx->options->total_ops) {
449                 /* Sending 0 length burst to flush sw crypto device */
450                 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
451
452                 /* dequeue burst */
453                 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
454                                 ops_processed, ctx->options->burst_sz);
455
456                 tsc_end = rte_rdtsc_precise();
457
458                 if (ops_deqd != 0) {
459                         for (i = 0; i < ops_deqd; i++) {
460                                 pres = (struct cperf_op_result *)
461                                                 (ops_processed[i]->opaque_data);
462                                 pres->status = ops_processed[i]->status;
463                                 pres->tsc_end = tsc_end;
464
465                                 rte_crypto_op_free(ops_processed[i]);
466                         }
467
468                         deqd_tot += ops_deqd;
469                         deqd_max = max(ops_deqd, deqd_max);
470                         deqd_min = min(ops_deqd, deqd_min);
471                 }
472         }
473
474         for (i = 0; i < tsc_idx; i++) {
475                 tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
476                 tsc_max = max(tsc_val, tsc_max);
477                 tsc_min = min(tsc_val, tsc_min);
478                 tsc_tot += tsc_val;
479         }
480
481         ctx->results.enqd_tot = enqd_tot;
482         ctx->results.enqd_max = enqd_max;
483         ctx->results.enqd_min = enqd_min;
484
485         ctx->results.deqd_tot = deqd_tot;
486         ctx->results.deqd_max = deqd_max;
487         ctx->results.deqd_min = deqd_min;
488
489         ctx->results.cycles_tot = tsc_tot;
490         ctx->results.cycles_max = tsc_max;
491         ctx->results.cycles_min = tsc_min;
492
493         ctx->results.burst_num = b_idx;
494         ctx->results.num = tsc_idx;
495
496         return 0;
497 }
498
499 void
500 cperf_latency_test_destructor(void *arg)
501 {
502         struct cperf_latency_ctx *ctx = arg;
503         uint64_t i;
504         if (ctx == NULL)
505                 return;
506         static int only_once;
507         uint64_t etot, eavg, emax, emin;
508         uint64_t dtot, davg, dmax, dmin;
509         uint64_t ctot, cavg, cmax, cmin;
510         double ttot, tavg, tmax, tmin;
511
512         const uint64_t tunit = 1000000; /* us */
513         const uint64_t tsc_hz = rte_get_tsc_hz();
514
515         etot = ctx->results.enqd_tot;
516         eavg = ctx->results.enqd_tot / ctx->results.burst_num;
517         emax = ctx->results.enqd_max;
518         emin = ctx->results.enqd_min;
519
520         dtot = ctx->results.deqd_tot;
521         davg = ctx->results.deqd_tot / ctx->results.burst_num;
522         dmax = ctx->results.deqd_max;
523         dmin = ctx->results.deqd_min;
524
525         ctot = ctx->results.cycles_tot;
526         cavg = ctx->results.cycles_tot / ctx->results.num;
527         cmax = ctx->results.cycles_max;
528         cmin = ctx->results.cycles_min;
529
530         ttot = tunit*(double)(ctot) / tsc_hz;
531         tavg = tunit*(double)(cavg) / tsc_hz;
532         tmax = tunit*(double)(cmax) / tsc_hz;
533         tmin = tunit*(double)(cmin) / tsc_hz;
534
535         if (ctx->options->csv) {
536                 if (!only_once)
537                         printf("\n# lcore, Pakt Seq #, Packet Size, cycles,"
538                                         " time (us)");
539
540                 for (i = 0; i < ctx->options->total_ops; i++) {
541
542                         printf("\n%u;%"PRIu64";%"PRIu64";%.3f",
543                                 ctx->lcore_id, i + 1,
544                                 ctx->res[i].tsc_end - ctx->res[i].tsc_start,
545                                 tunit * (double) (ctx->res[i].tsc_end
546                                                 - ctx->res[i].tsc_start)
547                                         / tsc_hz);
548
549                 }
550                 only_once = 1;
551         } else {
552                 printf("\n# Device %d on lcore %u\n", ctx->dev_id,
553                         ctx->lcore_id);
554                 printf("\n# total operations: %u", ctx->options->total_ops);
555                 printf("\n#     burst number: %"PRIu64,
556                                 ctx->results.burst_num);
557                 printf("\n#");
558                 printf("\n#          \t       Total\t   Average\t   Maximum\t "
559                                 "  Minimum");
560                 printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
561                                 "%10"PRIu64, etot, eavg, emax, emin);
562                 printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
563                                 "%10"PRIu64, dtot, davg, dmax, dmin);
564                 printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
565                                 "%10"PRIu64, ctot, cavg, cmax, cmin);
566                 printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f", ttot,
567                         tavg, tmax, tmin);
568                 printf("\n\n");
569
570         }
571         cperf_latency_test_free(ctx, ctx->options->pool_sz);
572
573 }