app/crypto-perf: move verify as single test type
[dpdk.git] / app / test-crypto-perf / cperf_test_latency.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40
41
42 struct cperf_latency_results {
43
44         uint64_t ops_failed;
45
46         uint64_t enqd_tot;
47         uint64_t enqd_max;
48         uint64_t enqd_min;
49
50         uint64_t deqd_tot;
51         uint64_t deqd_max;
52         uint64_t deqd_min;
53
54         uint64_t cycles_tot;
55         uint64_t cycles_max;
56         uint64_t cycles_min;
57
58         uint64_t burst_num;
59         uint64_t num;
60 };
61
62 struct cperf_op_result {
63         uint64_t tsc_start;
64         uint64_t tsc_end;
65         enum rte_crypto_op_status status;
66 };
67
68 struct cperf_latency_ctx {
69         uint8_t dev_id;
70         uint16_t qp_id;
71         uint8_t lcore_id;
72
73         struct rte_mempool *pkt_mbuf_pool_in;
74         struct rte_mempool *pkt_mbuf_pool_out;
75         struct rte_mbuf **mbufs_in;
76         struct rte_mbuf **mbufs_out;
77
78         struct rte_mempool *crypto_op_pool;
79
80         struct rte_cryptodev_sym_session *sess;
81
82         cperf_populate_ops_t populate_ops;
83
84         const struct cperf_options *options;
85         const struct cperf_test_vector *test_vector;
86         struct cperf_op_result *res;
87         struct cperf_latency_results results;
88 };
89
90 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
91 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
92
93 static void
94 cperf_latency_test_free(struct cperf_latency_ctx *ctx, uint32_t mbuf_nb)
95 {
96         uint32_t i;
97
98         if (ctx) {
99                 if (ctx->sess)
100                         rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
101
102                 if (ctx->mbufs_in) {
103                         for (i = 0; i < mbuf_nb; i++)
104                                 rte_pktmbuf_free(ctx->mbufs_in[i]);
105
106                         rte_free(ctx->mbufs_in);
107                 }
108
109                 if (ctx->mbufs_out) {
110                         for (i = 0; i < mbuf_nb; i++) {
111                                 if (ctx->mbufs_out[i] != NULL)
112                                         rte_pktmbuf_free(ctx->mbufs_out[i]);
113                         }
114
115                         rte_free(ctx->mbufs_out);
116                 }
117
118                 if (ctx->pkt_mbuf_pool_in)
119                         rte_mempool_free(ctx->pkt_mbuf_pool_in);
120
121                 if (ctx->pkt_mbuf_pool_out)
122                         rte_mempool_free(ctx->pkt_mbuf_pool_out);
123
124                 if (ctx->crypto_op_pool)
125                         rte_mempool_free(ctx->crypto_op_pool);
126
127                 rte_free(ctx->res);
128                 rte_free(ctx);
129         }
130 }
131
132 static struct rte_mbuf *
133 cperf_mbuf_create(struct rte_mempool *mempool,
134                 uint32_t segments_nb,
135                 const struct cperf_options *options,
136                 const struct cperf_test_vector *test_vector)
137 {
138         struct rte_mbuf *mbuf;
139         uint32_t segment_sz = options->buffer_sz / segments_nb;
140         uint32_t last_sz = options->buffer_sz % segments_nb;
141         uint8_t *mbuf_data;
142         uint8_t *test_data =
143                         (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
144                                         test_vector->plaintext.data :
145                                         test_vector->ciphertext.data;
146
147         mbuf = rte_pktmbuf_alloc(mempool);
148         if (mbuf == NULL)
149                 goto error;
150
151         mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
152         if (mbuf_data == NULL)
153                 goto error;
154
155         memcpy(mbuf_data, test_data, segment_sz);
156         test_data += segment_sz;
157         segments_nb--;
158
159         while (segments_nb) {
160                 struct rte_mbuf *m;
161
162                 m = rte_pktmbuf_alloc(mempool);
163                 if (m == NULL)
164                         goto error;
165
166                 rte_pktmbuf_chain(mbuf, m);
167
168                 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
169                 if (mbuf_data == NULL)
170                         goto error;
171
172                 memcpy(mbuf_data, test_data, segment_sz);
173                 test_data += segment_sz;
174                 segments_nb--;
175         }
176
177         if (last_sz) {
178                 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
179                 if (mbuf_data == NULL)
180                         goto error;
181
182                 memcpy(mbuf_data, test_data, last_sz);
183         }
184
185         mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
186                         options->auth_digest_sz);
187         if (mbuf_data == NULL)
188                 goto error;
189
190         if (options->op_type == CPERF_AEAD) {
191                 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
192                         RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
193
194                 if (aead == NULL)
195                         goto error;
196
197                 memcpy(aead, test_vector->aad.data, test_vector->aad.length);
198         }
199
200         return mbuf;
201 error:
202         if (mbuf != NULL)
203                 rte_pktmbuf_free(mbuf);
204
205         return NULL;
206 }
207
208 void *
209 cperf_latency_test_constructor(uint8_t dev_id, uint16_t qp_id,
210                 const struct cperf_options *options,
211                 const struct cperf_test_vector *test_vector,
212                 const struct cperf_op_fns *op_fns)
213 {
214         struct cperf_latency_ctx *ctx = NULL;
215         unsigned int mbuf_idx = 0;
216         char pool_name[32] = "";
217
218         ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
219         if (ctx == NULL)
220                 goto err;
221
222         ctx->dev_id = dev_id;
223         ctx->qp_id = qp_id;
224
225         ctx->populate_ops = op_fns->populate_ops;
226         ctx->options = options;
227         ctx->test_vector = test_vector;
228
229         ctx->sess = op_fns->sess_create(dev_id, options, test_vector);
230         if (ctx->sess == NULL)
231                 goto err;
232
233         snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
234                                 dev_id);
235
236         ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
237                         options->pool_sz * options->segments_nb, 0, 0,
238                         RTE_PKTMBUF_HEADROOM +
239                         RTE_CACHE_LINE_ROUNDUP(
240                                 (options->buffer_sz / options->segments_nb) +
241                                 (options->buffer_sz % options->segments_nb) +
242                                         options->auth_digest_sz),
243                         rte_socket_id());
244
245         if (ctx->pkt_mbuf_pool_in == NULL)
246                 goto err;
247
248         /* Generate mbufs_in with plaintext populated for test */
249         if (ctx->options->pool_sz % ctx->options->burst_sz)
250                 goto err;
251
252         ctx->mbufs_in = rte_malloc(NULL,
253                         (sizeof(struct rte_mbuf *) *
254                         ctx->options->pool_sz), 0);
255
256         for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
257                 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
258                                 ctx->pkt_mbuf_pool_in, options->segments_nb,
259                                 options, test_vector);
260                 if (ctx->mbufs_in[mbuf_idx] == NULL)
261                         goto err;
262         }
263
264         if (options->out_of_place == 1) {
265
266                 snprintf(pool_name, sizeof(pool_name),
267                                 "cperf_pool_out_cdev_%d",
268                                 dev_id);
269
270                 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
271                                 pool_name, options->pool_sz, 0, 0,
272                                 RTE_PKTMBUF_HEADROOM +
273                                 RTE_CACHE_LINE_ROUNDUP(
274                                         options->buffer_sz +
275                                         options->auth_digest_sz),
276                                 rte_socket_id());
277
278                 if (ctx->pkt_mbuf_pool_out == NULL)
279                         goto err;
280         }
281
282         ctx->mbufs_out = rte_malloc(NULL,
283                         (sizeof(struct rte_mbuf *) *
284                         ctx->options->pool_sz), 0);
285
286         for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
287                 if (options->out_of_place == 1) {
288                         ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
289                                         ctx->pkt_mbuf_pool_out, 1,
290                                         options, test_vector);
291                         if (ctx->mbufs_out[mbuf_idx] == NULL)
292                                 goto err;
293                 } else {
294                         ctx->mbufs_out[mbuf_idx] = NULL;
295                 }
296         }
297
298         snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
299                         dev_id);
300
301         ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
302                         RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0,
303                         rte_socket_id());
304         if (ctx->crypto_op_pool == NULL)
305                 goto err;
306
307         ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
308                         ctx->options->total_ops, 0);
309
310         if (ctx->res == NULL)
311                 goto err;
312
313         return ctx;
314 err:
315         cperf_latency_test_free(ctx, mbuf_idx);
316
317         return NULL;
318 }
319
320 int
321 cperf_latency_test_runner(void *arg)
322 {
323         struct cperf_latency_ctx *ctx = arg;
324         struct cperf_op_result *pres;
325
326         if (ctx == NULL)
327                 return 0;
328
329         struct rte_crypto_op *ops[ctx->options->burst_sz];
330         struct rte_crypto_op *ops_processed[ctx->options->burst_sz];
331         uint64_t ops_enqd = 0, ops_deqd = 0;
332         uint64_t m_idx = 0, b_idx = 0, i;
333
334         uint64_t tsc_val, tsc_end, tsc_start;
335         uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
336         uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
337         uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
338
339         uint32_t lcore = rte_lcore_id();
340
341 #ifdef CPERF_LINEARIZATION_ENABLE
342         struct rte_cryptodev_info dev_info;
343         int linearize = 0;
344
345         /* Check if source mbufs require coalescing */
346         if (ctx->options->segments_nb > 1) {
347                 rte_cryptodev_info_get(ctx->dev_id, &dev_info);
348                 if ((dev_info.feature_flags &
349                                 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
350                         linearize = 1;
351         }
352 #endif /* CPERF_LINEARIZATION_ENABLE */
353
354         ctx->lcore_id = lcore;
355
356         /* Warm up the host CPU before starting the test */
357         for (i = 0; i < ctx->options->total_ops; i++)
358                 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
359
360         while (enqd_tot < ctx->options->total_ops) {
361
362                 uint16_t burst_size = ((enqd_tot + ctx->options->burst_sz)
363                                 <= ctx->options->total_ops) ?
364                                                 ctx->options->burst_sz :
365                                                 ctx->options->total_ops -
366                                                 enqd_tot;
367
368                 /* Allocate crypto ops from pool */
369                 if (burst_size != rte_crypto_op_bulk_alloc(
370                                 ctx->crypto_op_pool,
371                                 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
372                                 ops, burst_size))
373                         return -1;
374
375                 /* Setup crypto op, attach mbuf etc */
376                 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
377                                 &ctx->mbufs_out[m_idx],
378                                 burst_size, ctx->sess, ctx->options,
379                                 ctx->test_vector);
380
381                 tsc_start = rte_rdtsc_precise();
382
383 #ifdef CPERF_LINEARIZATION_ENABLE
384                 if (linearize) {
385                         /* PMD doesn't support scatter-gather and source buffer
386                          * is segmented.
387                          * We need to linearize it before enqueuing.
388                          */
389                         for (i = 0; i < burst_size; i++)
390                                 rte_pktmbuf_linearize(ops[i]->sym->m_src);
391                 }
392 #endif /* CPERF_LINEARIZATION_ENABLE */
393
394                 /* Enqueue burst of ops on crypto device */
395                 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
396                                 ops, burst_size);
397
398                 /* Dequeue processed burst of ops from crypto device */
399                 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
400                                 ops_processed, ctx->options->burst_sz);
401
402                 tsc_end = rte_rdtsc_precise();
403
404                 for (i = 0; i < ops_enqd; i++) {
405                         ctx->res[tsc_idx].tsc_start = tsc_start;
406                         ops[i]->opaque_data = (void *)&ctx->res[tsc_idx];
407                         tsc_idx++;
408                 }
409
410                 /* Free memory for not enqueued operations */
411                 for (i = ops_enqd; i < burst_size; i++)
412                         rte_crypto_op_free(ops[i]);
413
414                 if (likely(ops_deqd))  {
415                         /*
416                          * free crypto ops so they can be reused. We don't free
417                          * the mbufs here as we don't want to reuse them as
418                          * the crypto operation will change the data and cause
419                          * failures.
420                          */
421                         for (i = 0; i < ops_deqd; i++) {
422                                 pres = (struct cperf_op_result *)
423                                                 (ops_processed[i]->opaque_data);
424                                 pres->status = ops_processed[i]->status;
425                                 pres->tsc_end = tsc_end;
426
427                                 rte_crypto_op_free(ops_processed[i]);
428                         }
429
430                         deqd_tot += ops_deqd;
431                         deqd_max = max(ops_deqd, deqd_max);
432                         deqd_min = min(ops_deqd, deqd_min);
433                 }
434
435                 enqd_tot += ops_enqd;
436                 enqd_max = max(ops_enqd, enqd_max);
437                 enqd_min = min(ops_enqd, enqd_min);
438
439                 m_idx += ops_enqd;
440                 m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ?
441                                 0 : m_idx;
442                 b_idx++;
443         }
444
445         /* Dequeue any operations still in the crypto device */
446         while (deqd_tot < ctx->options->total_ops) {
447                 /* Sending 0 length burst to flush sw crypto device */
448                 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
449
450                 /* dequeue burst */
451                 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
452                                 ops_processed, ctx->options->burst_sz);
453
454                 tsc_end = rte_rdtsc_precise();
455
456                 if (ops_deqd != 0) {
457                         for (i = 0; i < ops_deqd; i++) {
458                                 pres = (struct cperf_op_result *)
459                                                 (ops_processed[i]->opaque_data);
460                                 pres->status = ops_processed[i]->status;
461                                 pres->tsc_end = tsc_end;
462
463                                 rte_crypto_op_free(ops_processed[i]);
464                         }
465
466                         deqd_tot += ops_deqd;
467                         deqd_max = max(ops_deqd, deqd_max);
468                         deqd_min = min(ops_deqd, deqd_min);
469                 }
470         }
471
472         for (i = 0; i < tsc_idx; i++) {
473                 tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
474                 tsc_max = max(tsc_val, tsc_max);
475                 tsc_min = min(tsc_val, tsc_min);
476                 tsc_tot += tsc_val;
477         }
478
479         ctx->results.enqd_tot = enqd_tot;
480         ctx->results.enqd_max = enqd_max;
481         ctx->results.enqd_min = enqd_min;
482
483         ctx->results.deqd_tot = deqd_tot;
484         ctx->results.deqd_max = deqd_max;
485         ctx->results.deqd_min = deqd_min;
486
487         ctx->results.cycles_tot = tsc_tot;
488         ctx->results.cycles_max = tsc_max;
489         ctx->results.cycles_min = tsc_min;
490
491         ctx->results.burst_num = b_idx;
492         ctx->results.num = tsc_idx;
493
494         return 0;
495 }
496
497 void
498 cperf_latency_test_destructor(void *arg)
499 {
500         struct cperf_latency_ctx *ctx = arg;
501         uint64_t i;
502         if (ctx == NULL)
503                 return;
504         static int only_once;
505         uint64_t etot, eavg, emax, emin;
506         uint64_t dtot, davg, dmax, dmin;
507         uint64_t ctot, cavg, cmax, cmin;
508         double ttot, tavg, tmax, tmin;
509
510         const uint64_t tunit = 1000000; /* us */
511         const uint64_t tsc_hz = rte_get_tsc_hz();
512
513         etot = ctx->results.enqd_tot;
514         eavg = ctx->results.enqd_tot / ctx->results.burst_num;
515         emax = ctx->results.enqd_max;
516         emin = ctx->results.enqd_min;
517
518         dtot = ctx->results.deqd_tot;
519         davg = ctx->results.deqd_tot / ctx->results.burst_num;
520         dmax = ctx->results.deqd_max;
521         dmin = ctx->results.deqd_min;
522
523         ctot = ctx->results.cycles_tot;
524         cavg = ctx->results.cycles_tot / ctx->results.num;
525         cmax = ctx->results.cycles_max;
526         cmin = ctx->results.cycles_min;
527
528         ttot = tunit*(double)(ctot) / tsc_hz;
529         tavg = tunit*(double)(cavg) / tsc_hz;
530         tmax = tunit*(double)(cmax) / tsc_hz;
531         tmin = tunit*(double)(cmin) / tsc_hz;
532
533         if (ctx->options->csv) {
534                 if (!only_once)
535                         printf("\n# lcore, Pakt Seq #, Packet Size, cycles,"
536                                         " time (us)");
537
538                 for (i = 0; i < ctx->options->total_ops; i++) {
539
540                         printf("\n%u;%"PRIu64";%"PRIu64";%.3f",
541                                 ctx->lcore_id, i + 1,
542                                 ctx->res[i].tsc_end - ctx->res[i].tsc_start,
543                                 tunit * (double) (ctx->res[i].tsc_end
544                                                 - ctx->res[i].tsc_start)
545                                         / tsc_hz);
546
547                 }
548                 only_once = 1;
549         } else {
550                 printf("\n# Device %d on lcore %u\n", ctx->dev_id,
551                         ctx->lcore_id);
552                 printf("\n# total operations: %u", ctx->options->total_ops);
553                 printf("\n#     burst number: %"PRIu64,
554                                 ctx->results.burst_num);
555                 printf("\n#");
556                 printf("\n#          \t       Total\t   Average\t   Maximum\t "
557                                 "  Minimum");
558                 printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
559                                 "%10"PRIu64, etot, eavg, emax, emin);
560                 printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
561                                 "%10"PRIu64, dtot, davg, dmax, dmin);
562                 printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t"
563                                 "%10"PRIu64, ctot, cavg, cmax, cmin);
564                 printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f", ttot,
565                         tavg, tmax, tmin);
566                 printf("\n\n");
567
568         }
569         cperf_latency_test_free(ctx, ctx->options->pool_sz);
570
571 }