From 73e6b8c93dca77d24130833f3ee6e4239381b0f6 Mon Sep 17 00:00:00 2001 From: Jerin Jacob Date: Wed, 14 Jun 2017 10:27:32 +0530 Subject: [PATCH] eventdev: introduce burst mode capability Introducing the burst mode capability flag to express the event device is capable of operating in burst mode for enqueue(forward, release) and dequeue operation. If the device is not capable, then the application still uses the rte_event_dequeue_burst() and rte_event_enqueue_burst() but PMD accepts only one event at a time which is any way transparent with the current rte_event_*_burst API semantics. It solves two purposes: 1) Fix performance regression on the PMD which supports only nonburst mode, and this issue is two-fold. Typically the burst_worker main loop consists of following pseudo code: while(1) { uint16_t nb_rx = rte_event_dequeue_burst(ev,..); for (i=0; i < nb_rx; i++) { process(ev[i]); if (is_release_required(ev[i])) release_the_event(ev); } uint16_t nb_tx = rte_event_enqueue_burst(dev_id, port_id, events, nb_rx); while (nb_tx < nb_rx) nb_tx += rte_event_enqueue_burst(dev_id, port_id, events + nb_tx, nb_rx - nb_tx); } Typically the non_burst_worker main loop consists of following pseudo code: while(1) { uint16_t nb_rx = rte_event_dequeue_burst(&ev, , 1); if (!nb_rx) continue; process(ev); while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1); } Following overhead has been seen on nonburst mode capable PMDs with burst mode version - Extra explicit release(PMD does release on implicitly on next dequeue) and thus avoids the cost additional driver function overhead. - Extra "for" loop for event processing which compiler cannot detect at runtime 2) Simplify the application configuration by avoiding the application to find the correct enqueue and dequeue depth across different PMD. If burst mode is not supported then, PMD can ignore depth field. This will enable to write portable applications and makes RFC eventdev_pipeline application works on OCTEONTX PMD http://dpdk.org/dev/patchwork/patch/23799/ If an application wishes to get the maximum performance on nonburst capable PMD then the application can write the code in a way that by keeping packet processing function as inline functions and launch the workers based on the capability. The generic burst based worker still work on those PMDs without any code change but this scheme needed only when the application wants to gets the maximum performance out of nonburst capable PMDs. This patch is based the on the real world test cases http://dpdk.org/dev/patchwork/patch/24832/, Where without this scheme 20.9% performance drop observed per core. See worker_wrapper(), perf_queue_worker(), perf_queue_worker_burst() functions to use this scheme in a portable way without losing performance on both sets of PMDs and achieving the portability. http://dpdk.org/dev/patchwork/patch/24832/ Signed-off-by: Jerin Jacob Acked-by: Harry van Haaren --- lib/librte_eventdev/rte_eventdev.c | 10 ++++++---- lib/librte_eventdev/rte_eventdev.h | 20 ++++++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index a246965e63..cf651dccaa 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -426,8 +426,9 @@ rte_event_dev_configure(uint8_t dev_id, dev_id); return -EINVAL; } - if (dev_conf->nb_event_port_dequeue_depth > - info.max_event_port_dequeue_depth) { + if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) && + (dev_conf->nb_event_port_dequeue_depth > + info.max_event_port_dequeue_depth)) { RTE_EDEV_LOG_ERR("dev%d nb_dq_depth=%d > max_dq_depth=%d", dev_id, dev_conf->nb_event_port_dequeue_depth, info.max_event_port_dequeue_depth); @@ -440,8 +441,9 @@ rte_event_dev_configure(uint8_t dev_id, dev_id); return -EINVAL; } - if (dev_conf->nb_event_port_enqueue_depth > - info.max_event_port_enqueue_depth) { + if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) && + (dev_conf->nb_event_port_enqueue_depth > + info.max_event_port_enqueue_depth)) { RTE_EDEV_LOG_ERR("dev%d nb_enq_depth=%d > max_enq_depth=%d", dev_id, dev_conf->nb_event_port_enqueue_depth, info.max_event_port_enqueue_depth); diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index e4a62b4b31..b20a6c688b 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -274,6 +274,14 @@ struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ * * @see RTE_EVENT_QUEUE_CFG_* values */ +#define RTE_EVENT_DEV_CAP_BURST_MODE (1ULL << 4) +/**< Event device is capable of operating in burst mode for enqueue(forward, + * release) and dequeue operation. If this capability is not set, application + * still uses the rte_event_dequeue_burst() and rte_event_enqueue_burst() but + * PMD accepts only one event at a time. + * + * @see rte_event_dequeue_burst() rte_event_enqueue_burst() + */ /* Event device priority levels */ #define RTE_EVENT_DEV_PRIORITY_HIGHEST 0 @@ -433,14 +441,16 @@ struct rte_event_dev_config { /**< Maximum number of events can be dequeued at a time from an * event port by this device. * This value cannot exceed the *max_event_port_dequeue_depth* - * which previously provided in rte_event_dev_info_get() + * which previously provided in rte_event_dev_info_get(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. * @see rte_event_port_setup() */ uint32_t nb_event_port_enqueue_depth; /**< Maximum number of events can be enqueued at a time from an * event port by this device. * This value cannot exceed the *max_event_port_enqueue_depth* - * which previously provided in rte_event_dev_info_get() + * which previously provided in rte_event_dev_info_get(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. * @see rte_event_port_setup() */ uint32_t event_dev_cfg; @@ -642,12 +652,14 @@ struct rte_event_port_conf { uint16_t dequeue_depth; /**< Configure number of bulk dequeues for this event port. * This value cannot exceed the *nb_event_port_dequeue_depth* - * which previously supplied to rte_event_dev_configure() + * which previously supplied to rte_event_dev_configure(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. */ uint16_t enqueue_depth; /**< Configure number of bulk enqueues for this event port. * This value cannot exceed the *nb_event_port_enqueue_depth* - * which previously supplied to rte_event_dev_configure() + * which previously supplied to rte_event_dev_configure(). + * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable. */ }; -- 2.20.1