From 2369f73329f87a0c90f948fc32ef0811a94fb058 Mon Sep 17 00:00:00 2001 From: Jerin Jacob Date: Tue, 4 Jul 2017 10:23:20 +0530 Subject: [PATCH] app/testeventdev: add perf queue worker functions Signed-off-by: Jerin Jacob Acked-by: Harry van Haaren --- app/test-eventdev/test_perf_common.h | 60 ++++++++++++ app/test-eventdev/test_perf_queue.c | 136 +++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) diff --git a/app/test-eventdev/test_perf_common.h b/app/test-eventdev/test_perf_common.h index f8246953a4..9888e50788 100644 --- a/app/test-eventdev/test_perf_common.h +++ b/app/test-eventdev/test_perf_common.h @@ -86,6 +86,66 @@ struct perf_elt { uint64_t timestamp; } __rte_cache_aligned; +#define BURST_SIZE 16 + +#define PERF_WORKER_INIT\ + struct worker_data *w = arg;\ + struct test_perf *t = w->t;\ + struct evt_options *opt = t->opt;\ + const uint8_t dev = w->dev_id;\ + const uint8_t port = w->port_id;\ + uint8_t *const sched_type_list = &t->sched_type_list[0];\ + struct rte_mempool *const pool = t->pool;\ + const uint8_t nb_stages = t->opt->nb_stages;\ + const uint8_t laststage = nb_stages - 1;\ + uint8_t cnt = 0;\ + void *bufs[16] __rte_cache_aligned;\ + int const sz = RTE_DIM(bufs);\ + if (opt->verbose_level > 1)\ + printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ + rte_lcore_id(), dev, port) + +static inline __attribute__((always_inline)) int +perf_process_last_stage(struct rte_mempool *const pool, + struct rte_event *const ev, struct worker_data *const w, + void *bufs[], int const buf_sz, uint8_t count) +{ + bufs[count++] = ev->event_ptr; + w->processed_pkts++; + rte_smp_wmb(); + + if (unlikely(count == buf_sz)) { + count = 0; + rte_mempool_put_bulk(pool, bufs, buf_sz); + } + return count; +} + +static inline __attribute__((always_inline)) uint8_t +perf_process_last_stage_latency(struct rte_mempool *const pool, + struct rte_event *const ev, struct worker_data *const w, + void *bufs[], int const buf_sz, uint8_t count) +{ + uint64_t latency; + struct perf_elt *const m = ev->event_ptr; + + bufs[count++] = ev->event_ptr; + w->processed_pkts++; + + if (unlikely(count == buf_sz)) { + count = 0; + latency = rte_get_timer_cycles() - m->timestamp; + rte_mempool_put_bulk(pool, bufs, buf_sz); + } else { + latency = rte_get_timer_cycles() - m->timestamp; + } + + w->latency += latency; + rte_smp_wmb(); + return count; +} + + static inline int perf_nb_event_ports(struct evt_options *opt) { diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-eventdev/test_perf_queue.c index 1ac8231092..323d15f0e0 100644 --- a/app/test-eventdev/test_perf_queue.c +++ b/app/test-eventdev/test_perf_queue.c @@ -41,6 +41,141 @@ perf_queue_nb_event_queues(struct evt_options *opt) return evt_nr_active_lcores(opt->plcores) * opt->nb_stages; } +static inline __attribute__((always_inline)) void +mark_fwd_latency(struct rte_event *const ev, + const uint8_t nb_stages) +{ + if (unlikely((ev->queue_id % nb_stages) == 0)) { + struct perf_elt *const m = ev->event_ptr; + + m->timestamp = rte_get_timer_cycles(); + } +} + +static inline __attribute__((always_inline)) void +fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list, + const uint8_t nb_stages) +{ + ev->queue_id++; + ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; + ev->op = RTE_EVENT_OP_FORWARD; + ev->event_type = RTE_EVENT_TYPE_CPU; +} + +static int +perf_queue_worker(void *arg, const int enable_fwd_latency) +{ + PERF_WORKER_INIT; + struct rte_event ev; + + while (t->done == false) { + uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0); + + if (!event) { + rte_pause(); + continue; + } + if (enable_fwd_latency) + /* first q in pipeline, mark timestamp to compute fwd latency */ + mark_fwd_latency(&ev, nb_stages); + + /* last stage in pipeline */ + if (unlikely((ev.queue_id % nb_stages) == laststage)) { + if (enable_fwd_latency) + cnt = perf_process_last_stage_latency(pool, + &ev, w, bufs, sz, cnt); + else + cnt = perf_process_last_stage(pool, + &ev, w, bufs, sz, cnt); + } else { + fwd_event(&ev, sched_type_list, nb_stages); + while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1) + rte_pause(); + } + } + return 0; +} + +static int +perf_queue_worker_burst(void *arg, const int enable_fwd_latency) +{ + PERF_WORKER_INIT; + uint16_t i; + /* +1 to avoid prefetch out of array check */ + struct rte_event ev[BURST_SIZE + 1]; + + while (t->done == false) { + uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev, + BURST_SIZE, 0); + + if (!nb_rx) { + rte_pause(); + continue; + } + + for (i = 0; i < nb_rx; i++) { + if (enable_fwd_latency) { + rte_prefetch0(ev[i+1].event_ptr); + /* first queue in pipeline. + * mark time stamp to compute fwd latency + */ + mark_fwd_latency(&ev[i], nb_stages); + } + /* last stage in pipeline */ + if (unlikely((ev[i].queue_id % nb_stages) == + laststage)) { + if (enable_fwd_latency) + cnt = perf_process_last_stage_latency( + pool, &ev[i], w, bufs, sz, cnt); + else + cnt = perf_process_last_stage(pool, + &ev[i], w, bufs, sz, cnt); + + ev[i].op = RTE_EVENT_OP_RELEASE; + } else { + fwd_event(&ev[i], sched_type_list, nb_stages); + } + } + + uint16_t enq; + + enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); + while (enq < nb_rx) { + enq += rte_event_enqueue_burst(dev, port, + ev + enq, nb_rx - enq); + } + } + return 0; +} + +static int +worker_wrapper(void *arg) +{ + struct worker_data *w = arg; + struct evt_options *opt = w->t->opt; + + const bool burst = evt_has_burst_mode(w->dev_id); + const int fwd_latency = opt->fwd_latency; + + /* allow compiler to optimize */ + if (!burst && !fwd_latency) + return perf_queue_worker(arg, 0); + else if (!burst && fwd_latency) + return perf_queue_worker(arg, 1); + else if (burst && !fwd_latency) + return perf_queue_worker_burst(arg, 0); + else if (burst && fwd_latency) + return perf_queue_worker_burst(arg, 1); + + rte_panic("invalid worker\n"); +} + +static int +perf_queue_launch_lcores(struct evt_test *test, struct evt_options *opt) +{ + return perf_launch_lcores(test, opt, worker_wrapper); +} + static int perf_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt) { @@ -143,6 +278,7 @@ static const struct evt_test_ops perf_queue = { .test_setup = perf_test_setup, .mempool_setup = perf_mempool_setup, .eventdev_setup = perf_queue_eventdev_setup, + .launch_lcores = perf_queue_launch_lcores, .eventdev_destroy = perf_eventdev_destroy, .mempool_destroy = perf_mempool_destroy, .test_result = perf_test_result, -- 2.20.1