X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=app%2Ftest-crypto-perf%2Fcperf_test_throughput.c;h=61b27ea5f3b72ed60bdec4fd675307035b9966e0;hb=ad674b4d4a354e64b5fca9a4f14f0bd3dfe7db04;hp=92176d711917fc625ca6f79a65449ccc47ee3ef2;hpb=df52cb3b6e132603b00e55cbaf8071811bd8b20e;p=dpdk.git diff --git a/app/test-crypto-perf/cperf_test_throughput.c b/app/test-crypto-perf/cperf_test_throughput.c index 92176d7119..61b27ea5f3 100644 --- a/app/test-crypto-perf/cperf_test_throughput.c +++ b/app/test-crypto-perf/cperf_test_throughput.c @@ -38,18 +38,6 @@ #include "cperf_test_throughput.h" #include "cperf_ops.h" -struct cperf_throughput_results { - uint64_t ops_enqueued; - uint64_t ops_dequeued; - - uint64_t ops_enqueued_failed; - uint64_t ops_dequeued_failed; - - double ops_per_second; - double throughput_gbps; - double cycles_per_byte; -}; - struct cperf_throughput_ctx { uint8_t dev_id; uint16_t qp_id; @@ -68,8 +56,6 @@ struct cperf_throughput_ctx { const struct cperf_options *options; const struct cperf_test_vector *test_vector; - struct cperf_throughput_results results; - }; static void @@ -117,8 +103,8 @@ cperf_mbuf_create(struct rte_mempool *mempool, const struct cperf_test_vector *test_vector) { struct rte_mbuf *mbuf; - uint32_t segment_sz = options->buffer_sz / segments_nb; - uint32_t last_sz = options->buffer_sz % segments_nb; + uint32_t segment_sz = options->max_buffer_size / segments_nb; + uint32_t last_sz = options->max_buffer_size % segments_nb; uint8_t *mbuf_data; uint8_t *test_data = (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ? @@ -163,10 +149,12 @@ cperf_mbuf_create(struct rte_mempool *mempool, memcpy(mbuf_data, test_data, last_sz); } - mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, - options->auth_digest_sz); - if (mbuf_data == NULL) - goto error; + if (options->op_type != CPERF_CIPHER_ONLY) { + mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, + options->auth_digest_sz); + if (mbuf_data == NULL) + goto error; + } if (options->op_type == CPERF_AEAD) { uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf, @@ -218,8 +206,8 @@ cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, options->pool_sz * options->segments_nb, 0, 0, RTE_PKTMBUF_HEADROOM + RTE_CACHE_LINE_ROUNDUP( - (options->buffer_sz / options->segments_nb) + - (options->buffer_sz % options->segments_nb) + + (options->max_buffer_size / options->segments_nb) + + (options->max_buffer_size % options->segments_nb) + options->auth_digest_sz), rte_socket_id()); @@ -227,9 +215,6 @@ cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, goto err; /* Generate mbufs_in with plaintext populated for test */ - if (ctx->options->pool_sz % ctx->options->burst_sz) - goto err; - ctx->mbufs_in = rte_malloc(NULL, (sizeof(struct rte_mbuf *) * ctx->options->pool_sz), 0); @@ -250,7 +235,7 @@ cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, pool_name, options->pool_sz, 0, 0, RTE_PKTMBUF_HEADROOM + RTE_CACHE_LINE_ROUNDUP( - options->buffer_sz + + options->max_buffer_size + options->auth_digest_sz), rte_socket_id()); @@ -294,16 +279,14 @@ int cperf_throughput_test_runner(void *test_ctx) { struct cperf_throughput_ctx *ctx = test_ctx; + uint16_t test_burst_size; + uint8_t burst_size_idx = 0; - uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0; - uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0; - - uint64_t i, m_idx = 0, tsc_start, tsc_end, tsc_duration; - - uint16_t ops_unused = 0; + static int only_once; - struct rte_crypto_op *ops[ctx->options->burst_sz]; - struct rte_crypto_op *ops_processed[ctx->options->burst_sz]; + struct rte_crypto_op *ops[ctx->options->max_burst_size]; + struct rte_crypto_op *ops_processed[ctx->options->max_burst_size]; + uint64_t i; uint32_t lcore = rte_lcore_id(); @@ -322,131 +305,202 @@ cperf_throughput_test_runner(void *test_ctx) ctx->lcore_id = lcore; - if (!ctx->options->csv) - printf("\n# Running throughput test on device: %u, lcore: %u\n", - ctx->dev_id, lcore); - /* Warm up the host CPU before starting the test */ for (i = 0; i < ctx->options->total_ops; i++) rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); - tsc_start = rte_rdtsc_precise(); + /* Get first size from range or list */ + if (ctx->options->inc_burst_size != 0) + test_burst_size = ctx->options->min_burst_size; + else + test_burst_size = ctx->options->burst_size_list[0]; - while (ops_enqd_total < ctx->options->total_ops) { + while (test_burst_size <= ctx->options->max_burst_size) { + uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0; + uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0; - uint16_t burst_size = ((ops_enqd_total + ctx->options->burst_sz) - <= ctx->options->total_ops) ? - ctx->options->burst_sz : - ctx->options->total_ops - - ops_enqd_total; + uint64_t m_idx = 0, tsc_start, tsc_end, tsc_duration; - uint16_t ops_needed = burst_size - ops_unused; + uint16_t ops_unused = 0; - /* Allocate crypto ops from pool */ - if (ops_needed != rte_crypto_op_bulk_alloc( - ctx->crypto_op_pool, - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - ops, ops_needed)) - return -1; + tsc_start = rte_rdtsc_precise(); - /* Setup crypto op, attach mbuf etc */ - (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx], - &ctx->mbufs_out[m_idx], - ops_needed, ctx->sess, ctx->options, - ctx->test_vector); + while (ops_enqd_total < ctx->options->total_ops) { -#ifdef CPERF_LINEARIZATION_ENABLE - if (linearize) { - /* PMD doesn't support scatter-gather and source buffer - * is segmented. - * We need to linearize it before enqueuing. - */ - for (i = 0; i < burst_size; i++) - rte_pktmbuf_linearize(ops[i]->sym->m_src); - } -#endif /* CPERF_LINEARIZATION_ENABLE */ + uint16_t burst_size = ((ops_enqd_total + test_burst_size) + <= ctx->options->total_ops) ? + test_burst_size : + ctx->options->total_ops - + ops_enqd_total; - /* Enqueue burst of ops on crypto device */ - ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, - ops, burst_size); - if (ops_enqd < burst_size) - ops_enqd_failed++; - - /** - * Calculate number of ops not enqueued (mainly for hw - * accelerators whose ingress queue can fill up). - */ - ops_unused = burst_size - ops_enqd; - ops_enqd_total += ops_enqd; - - - /* Dequeue processed burst of ops from crypto device */ - ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, - ops_processed, ctx->options->burst_sz); - - if (likely(ops_deqd)) { - /* free crypto ops so they can be reused. We don't free - * the mbufs here as we don't want to reuse them as - * the crypto operation will change the data and cause - * failures. - */ - for (i = 0; i < ops_deqd; i++) - rte_crypto_op_free(ops_processed[i]); + uint16_t ops_needed = burst_size - ops_unused; + + /* Allocate crypto ops from pool */ + if (ops_needed != rte_crypto_op_bulk_alloc( + ctx->crypto_op_pool, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + ops, ops_needed)) + return -1; + + /* Setup crypto op, attach mbuf etc */ + (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx], + &ctx->mbufs_out[m_idx], + ops_needed, ctx->sess, ctx->options, + ctx->test_vector); - ops_deqd_total += ops_deqd; - } else { /** - * Count dequeue polls which didn't return any - * processed operations. This statistic is mainly - * relevant to hw accelerators. + * When ops_needed is smaller than ops_enqd, the + * unused ops need to be moved to the front for + * next round use. */ - ops_deqd_failed++; - } + if (unlikely(ops_enqd > ops_needed)) { + size_t nb_b_to_mov = ops_unused * sizeof( + struct rte_crypto_op *); - m_idx += ops_needed; - m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ? - 0 : m_idx; - } + memmove(&ops[ops_needed], &ops[ops_enqd], + nb_b_to_mov); + } - /* Dequeue any operations still in the crypto device */ +#ifdef CPERF_LINEARIZATION_ENABLE + if (linearize) { + /* PMD doesn't support scatter-gather and source buffer + * is segmented. + * We need to linearize it before enqueuing. + */ + for (i = 0; i < burst_size; i++) + rte_pktmbuf_linearize(ops[i]->sym->m_src); + } +#endif /* CPERF_LINEARIZATION_ENABLE */ - while (ops_deqd_total < ctx->options->total_ops) { - /* Sending 0 length burst to flush sw crypto device */ - rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); + /* Enqueue burst of ops on crypto device */ + ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, + ops, burst_size); + if (ops_enqd < burst_size) + ops_enqd_failed++; - /* dequeue burst */ - ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, - ops_processed, ctx->options->burst_sz); - if (ops_deqd == 0) - ops_deqd_failed++; - else { - for (i = 0; i < ops_deqd; i++) - rte_crypto_op_free(ops_processed[i]); + /** + * Calculate number of ops not enqueued (mainly for hw + * accelerators whose ingress queue can fill up). + */ + ops_unused = burst_size - ops_enqd; + ops_enqd_total += ops_enqd; + + + /* Dequeue processed burst of ops from crypto device */ + ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, + ops_processed, test_burst_size); + + if (likely(ops_deqd)) { + /* free crypto ops so they can be reused. We don't free + * the mbufs here as we don't want to reuse them as + * the crypto operation will change the data and cause + * failures. + */ + for (i = 0; i < ops_deqd; i++) + rte_crypto_op_free(ops_processed[i]); + + ops_deqd_total += ops_deqd; + } else { + /** + * Count dequeue polls which didn't return any + * processed operations. This statistic is mainly + * relevant to hw accelerators. + */ + ops_deqd_failed++; + } - ops_deqd_total += ops_deqd; + m_idx += ops_needed; + m_idx = m_idx + test_burst_size > ctx->options->pool_sz ? + 0 : m_idx; } - } - tsc_end = rte_rdtsc_precise(); - tsc_duration = (tsc_end - tsc_start); + /* Dequeue any operations still in the crypto device */ - /* Calculate average operations processed per second */ - ctx->results.ops_per_second = ((double)ctx->options->total_ops / - tsc_duration) * rte_get_tsc_hz(); + while (ops_deqd_total < ctx->options->total_ops) { + /* Sending 0 length burst to flush sw crypto device */ + rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); - /* Calculate average throughput (Gbps) in bits per second */ - ctx->results.throughput_gbps = ((ctx->results.ops_per_second * - ctx->options->buffer_sz * 8) / 1000000000); + /* dequeue burst */ + ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, + ops_processed, test_burst_size); + if (ops_deqd == 0) + ops_deqd_failed++; + else { + for (i = 0; i < ops_deqd; i++) + rte_crypto_op_free(ops_processed[i]); - /* Calculate average cycles per byte */ - ctx->results.cycles_per_byte = ((double)tsc_duration / - ctx->options->total_ops) / ctx->options->buffer_sz; + ops_deqd_total += ops_deqd; + } + } - ctx->results.ops_enqueued = ops_enqd_total; - ctx->results.ops_dequeued = ops_deqd_total; + tsc_end = rte_rdtsc_precise(); + tsc_duration = (tsc_end - tsc_start); + + /* Calculate average operations processed per second */ + double ops_per_second = ((double)ctx->options->total_ops / + tsc_duration) * rte_get_tsc_hz(); + + /* Calculate average throughput (Gbps) in bits per second */ + double throughput_gbps = ((ops_per_second * + ctx->options->test_buffer_size * 8) / 1000000000); + + /* Calculate average cycles per packet */ + double cycles_per_packet = ((double)tsc_duration / + ctx->options->total_ops); + + if (!ctx->options->csv) { + if (!only_once) + printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n", + "lcore id", "Buf Size", "Burst Size", + "Enqueued", "Dequeued", "Failed Enq", + "Failed Deq", "MOps", "Gbps", + "Cycles/Buf"); + only_once = 1; + + printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64 + "%12"PRIu64"%12.4f%12.4f%12.2f\n", + ctx->lcore_id, + ctx->options->test_buffer_size, + test_burst_size, + ops_enqd_total, + ops_deqd_total, + ops_enqd_failed, + ops_deqd_failed, + ops_per_second/1000000, + throughput_gbps, + cycles_per_packet); + } else { + if (!only_once) + printf("# lcore id, Buffer Size(B)," + "Burst Size,Enqueued,Dequeued,Failed Enq," + "Failed Deq,Ops(Millions),Throughput(Gbps)," + "Cycles/Buf\n\n"); + only_once = 1; + + printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" + "%.f3;%.f3;%.f3\n", + ctx->lcore_id, + ctx->options->test_buffer_size, + test_burst_size, + ops_enqd_total, + ops_deqd_total, + ops_enqd_failed, + ops_deqd_failed, + ops_per_second/1000000, + throughput_gbps, + cycles_per_packet); + } - ctx->results.ops_enqueued_failed = ops_enqd_failed; - ctx->results.ops_dequeued_failed = ops_deqd_failed; + /* Get next size from range or list */ + if (ctx->options->inc_burst_size != 0) + test_burst_size += ctx->options->inc_burst_size; + else { + if (++burst_size_idx == ctx->options->burst_size_count) + break; + test_burst_size = ctx->options->burst_size_list[burst_size_idx]; + } + + } return 0; } @@ -456,50 +510,9 @@ void cperf_throughput_test_destructor(void *arg) { struct cperf_throughput_ctx *ctx = arg; - struct cperf_throughput_results *results = &ctx->results; - static int only_once; if (ctx == NULL) return; - if (!ctx->options->csv) { - printf("\n# Device %d on lcore %u\n", - ctx->dev_id, ctx->lcore_id); - printf("# Buffer Size(B)\t Enqueued\t Dequeued\tFailed Enq" - "\tFailed Deq\tOps(Millions)\tThroughput(Gbps)" - "\tCycles Per Byte\n"); - - printf("\n%16u\t%10"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t" - "%10"PRIu64"\t%16.4f\t%16.4f\t%15.2f\n", - ctx->options->buffer_sz, - results->ops_enqueued, - results->ops_dequeued, - results->ops_enqueued_failed, - results->ops_dequeued_failed, - results->ops_per_second/1000000, - results->throughput_gbps, - results->cycles_per_byte); - } else { - if (!only_once) - printf("\n# CPU lcore id, Burst Size(B), " - "Buffer Size(B),Enqueued,Dequeued,Failed Enq," - "Failed Deq,Ops(Millions),Throughput(Gbps)," - "Cycles Per Byte\n"); - only_once = 1; - - printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" - "%.f3;%.f3;%.f3\n", - ctx->lcore_id, - ctx->options->burst_sz, - ctx->options->buffer_sz, - results->ops_enqueued, - results->ops_dequeued, - results->ops_enqueued_failed, - results->ops_dequeued_failed, - results->ops_per_second/1000000, - results->throughput_gbps, - results->cycles_per_byte); - } - cperf_throughput_test_free(ctx, ctx->options->pool_sz); }