From: Ilia Kurakin Date: Thu, 19 Jul 2018 12:21:42 +0000 (+0300) Subject: ethdev: change vtune profiling approach X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=2c1bbab7f09d42210ad4c9e120c159c3bf038c8f;p=dpdk.git ethdev: change vtune profiling approach The patch changes rx_burst profiling approach: 1. VTune's instrumentation is removed 2. empty hook callback for profiling is added This way all VTune-specific logic moves to the VTune side. Hook is enabled only when CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE option is turned on. VTune uses this hook to attach to the polling cycle. It is not possible to attach to the rx_burst directly, as it is inline. Signed-off-by: Ilia Kurakin Acked-by: Keith Wiles Acked-by: Ferruh Yigit --- diff --git a/config/common_base b/config/common_base index 4bcbaf923f..155c7d40ea 100644 --- a/config/common_base +++ b/config/common_base @@ -128,7 +128,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y -CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n +CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n # # Turn off Tx preparation stage diff --git a/doc/guides/prog_guide/profile_app.rst b/doc/guides/prog_guide/profile_app.rst index 1106216a09..02f05614a2 100644 --- a/doc/guides/prog_guide/profile_app.rst +++ b/doc/guides/prog_guide/profile_app.rst @@ -33,38 +33,12 @@ Refer to the for details about application profiling. -Empty cycles tracing +Profiling with VTune ~~~~~~~~~~~~~~~~~~~~ -Iterations that yielded no RX packets (empty cycles, wasted iterations) can -be analyzed using VTune Amplifier. This profiling employs the -`Instrumentation and Tracing Technology (ITT) API -`_ -feature of VTune Amplifier and requires only reconfiguring the DPDK library, -no changes in a DPDK application are needed. - -To trace wasted iterations on RX queues, first reconfigure DPDK with -``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and -``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled. - -Then rebuild DPDK, specifying paths to the ITT header and library, which can -be found in any VTune Amplifier distribution in the *include* and *lib* -directories respectively: - -.. code-block:: console - - make EXTRA_CFLAGS=-I \ - EXTRA_LDLIBS="-L -littnotify" - -Finally, to see wasted iterations in your performance analysis results, -select the *"Analyze user tasks, events, and counters"* checkbox in the -*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI. -Alternatively, when running VTune Amplifier via command line, specify -``-knob enable-user-tasks=true`` option. - -Collected regions of wasted iterations will be marked on VTune Amplifier's -timeline as ITT tasks. These ITT tasks have predefined names, containing -Ethernet device and RX queue identifiers. +To allow VTune attaching to the DPDK application, reconfigure and recompile +the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and +``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled. Profiling on ARM64 diff --git a/lib/librte_ethdev/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c index 0d1dcda36d..a3c303f62c 100644 --- a/lib/librte_ethdev/ethdev_profile.c +++ b/lib/librte_ethdev/ethdev_profile.c @@ -1,87 +1,33 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #include "ethdev_profile.h" /** - * This conditional block enables RX queues profiling by tracking wasted - * iterations, i.e. iterations which yielded no RX packets. Profiling is - * performed using the Instrumentation and Tracing Technology (ITT) API, - * employed by the Intel (R) VTune (TM) Amplifier. + * This conditional block enables Ethernet device profiling with + * Intel (R) VTune (TM) Amplifier. */ -#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS - -#include - -#define ITT_MAX_NAME_LEN (100) - -/** - * Auxiliary ITT structure belonging to Ethernet device and using to: - * - track RX queue state to determine whether it is wasting loop iterations - * - begin or end ITT task using task domain and task name (handle) - */ -struct itt_profile_rx_data { - /** - * ITT domains for each queue. - */ - __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT]; - /** - * ITT task names for each queue. - */ - __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT]; - /** - * Flags indicating the queues state. Possible values: - * 1 - queue is wasting iterations, - * 0 - otherwise. - */ - uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT]; -}; - -/** - * The pool of *itt_profile_rx_data* structures. - */ -struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS]; - +#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE /** - * This callback function manages ITT tasks collection on given port and queue. - * It must be registered with rte_eth_add_rx_callback() to be called from - * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function - * type declaration. + * Hook callback to trace rte_eth_rx_burst() calls. */ -static uint16_t -collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, +uint16_t +profile_hook_rx_burst_cb( + __rte_unused uint16_t port_id, __rte_unused uint16_t queue_id, __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts, __rte_unused uint16_t max_pkts, __rte_unused void *user_param) { - if (unlikely(nb_pkts == 0)) { - if (!itt_rx_data[port_id].queue_state[queue_id]) { - __itt_task_begin( - itt_rx_data[port_id].domains[queue_id], - __itt_null, __itt_null, - itt_rx_data[port_id].handles[queue_id]); - itt_rx_data[port_id].queue_state[queue_id] = 1; - } - } else { - if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) { - __itt_task_end( - itt_rx_data[port_id].domains[queue_id]); - itt_rx_data[port_id].queue_state[queue_id] = 0; - } - } return nb_pkts; } /** - * Initialization of itt_profile_rx_data for a given Ethernet device. + * Setting profiling rx callback for a given Ethernet device. * This function must be invoked when ethernet device is being configured. - * Result will be stored in the global array *itt_rx_data*. * * @param port_id * The port identifier of the Ethernet device. - * @param port_name - * The name of the Ethernet device. * @param rx_queue_num * The number of RX queues on specified port. * @@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, * - On failure, a negative value. */ static inline int -itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num) +vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num) { uint16_t q_id; for (q_id = 0; q_id < rx_queue_num; ++q_id) { - char domain_name[ITT_MAX_NAME_LEN]; - - snprintf(domain_name, sizeof(domain_name), - "RXBurst.WastedIterations.Port_%s.Queue_%d", - port_name, q_id); - itt_rx_data[port_id].domains[q_id] - = __itt_domain_create(domain_name); - - char task_name[ITT_MAX_NAME_LEN]; - - snprintf(task_name, sizeof(task_name), - "port id: %d; queue id: %d", - port_id, q_id); - itt_rx_data[port_id].handles[q_id] - = __itt_string_handle_create(task_name); - - itt_rx_data[port_id].queue_state[q_id] = 0; - if (!rte_eth_add_rx_callback( - port_id, q_id, collect_itt_rx_burst_cb, NULL)) { + port_id, q_id, profile_hook_rx_burst_cb, NULL)) { return -rte_errno; } } return 0; } -#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */ +#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */ int -__rte_eth_profile_rx_init(__rte_unused uint16_t port_id, +__rte_eth_dev_profile_init(__rte_unused uint16_t port_id, __rte_unused struct rte_eth_dev *dev) { -#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS - return itt_profile_rx_init( - port_id, dev->data->name, dev->data->nb_rx_queues); +#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE + return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues); #endif return 0; } diff --git a/lib/librte_ethdev/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h index e5ea368249..65031e6f3f 100644 --- a/lib/librte_ethdev/ethdev_profile.h +++ b/lib/librte_ethdev/ethdev_profile.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _RTE_ETHDEV_PROFILE_H_ @@ -8,7 +8,7 @@ #include "rte_ethdev.h" /** - * Initialization of profiling RX queues for the Ethernet device. + * Initialization of the Ethernet device profiling. * Implementation of this function depends on chosen profiling method, * defined in configs. * @@ -22,6 +22,6 @@ * - On failure, a negative value. */ int -__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev); +__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev); #endif diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 4c32025058..f32722f361 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -1191,9 +1191,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } /* Initialize Rx profiling if enabled at compilation time. */ - diag = __rte_eth_profile_rx_init(port_id, dev); + diag = __rte_eth_dev_profile_init(port_id, dev); if (diag != 0) { - RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n", + RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n", port_id, diag); rte_eth_dev_rx_queue_config(dev, 0); rte_eth_dev_tx_queue_config(dev, 0);