for details about application profiling.
-Empty cycles tracing
+Profiling with VTune
~~~~~~~~~~~~~~~~~~~~
-Iterations that yielded no RX packets (empty cycles, wasted iterations) can
-be analyzed using VTune Amplifier. This profiling employs the
-`Instrumentation and Tracing Technology (ITT) API
-<https://software.intel.com/en-us/node/544195>`_
-feature of VTune Amplifier and requires only reconfiguring the DPDK library,
-no changes in a DPDK application are needed.
-
-To trace wasted iterations on RX queues, first reconfigure DPDK with
-``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
-``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
-
-Then rebuild DPDK, specifying paths to the ITT header and library, which can
-be found in any VTune Amplifier distribution in the *include* and *lib*
-directories respectively:
-
-.. code-block:: console
-
- make EXTRA_CFLAGS=-I<path to ittnotify.h> \
- EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
-
-Finally, to see wasted iterations in your performance analysis results,
-select the *"Analyze user tasks, events, and counters"* checkbox in the
-*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
-Alternatively, when running VTune Amplifier via command line, specify
-``-knob enable-user-tasks=true`` option.
-
-Collected regions of wasted iterations will be marked on VTune Amplifier's
-timeline as ITT tasks. These ITT tasks have predefined names, containing
-Ethernet device and RX queue identifiers.
+To allow VTune attaching to the DPDK application, reconfigure and recompile
+the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
+``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
Profiling on ARM64
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
*/
#include "ethdev_profile.h"
/**
- * This conditional block enables RX queues profiling by tracking wasted
- * iterations, i.e. iterations which yielded no RX packets. Profiling is
- * performed using the Instrumentation and Tracing Technology (ITT) API,
- * employed by the Intel (R) VTune (TM) Amplifier.
+ * This conditional block enables Ethernet device profiling with
+ * Intel (R) VTune (TM) Amplifier.
*/
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-
-#include <ittnotify.h>
-
-#define ITT_MAX_NAME_LEN (100)
-
-/**
- * Auxiliary ITT structure belonging to Ethernet device and using to:
- * - track RX queue state to determine whether it is wasting loop iterations
- * - begin or end ITT task using task domain and task name (handle)
- */
-struct itt_profile_rx_data {
- /**
- * ITT domains for each queue.
- */
- __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
- /**
- * ITT task names for each queue.
- */
- __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
- /**
- * Flags indicating the queues state. Possible values:
- * 1 - queue is wasting iterations,
- * 0 - otherwise.
- */
- uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
-};
-
-/**
- * The pool of *itt_profile_rx_data* structures.
- */
-struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
-
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
/**
- * This callback function manages ITT tasks collection on given port and queue.
- * It must be registered with rte_eth_add_rx_callback() to be called from
- * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
- * type declaration.
+ * Hook callback to trace rte_eth_rx_burst() calls.
*/
-static uint16_t
-collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+uint16_t
+profile_hook_rx_burst_cb(
+ __rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
__rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
{
- if (unlikely(nb_pkts == 0)) {
- if (!itt_rx_data[port_id].queue_state[queue_id]) {
- __itt_task_begin(
- itt_rx_data[port_id].domains[queue_id],
- __itt_null, __itt_null,
- itt_rx_data[port_id].handles[queue_id]);
- itt_rx_data[port_id].queue_state[queue_id] = 1;
- }
- } else {
- if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
- __itt_task_end(
- itt_rx_data[port_id].domains[queue_id]);
- itt_rx_data[port_id].queue_state[queue_id] = 0;
- }
- }
return nb_pkts;
}
/**
- * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * Setting profiling rx callback for a given Ethernet device.
* This function must be invoked when ethernet device is being configured.
- * Result will be stored in the global array *itt_rx_data*.
*
* @param port_id
* The port identifier of the Ethernet device.
- * @param port_name
- * The name of the Ethernet device.
* @param rx_queue_num
* The number of RX queues on specified port.
*
* - On failure, a negative value.
*/
static inline int
-itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
{
uint16_t q_id;
for (q_id = 0; q_id < rx_queue_num; ++q_id) {
- char domain_name[ITT_MAX_NAME_LEN];
-
- snprintf(domain_name, sizeof(domain_name),
- "RXBurst.WastedIterations.Port_%s.Queue_%d",
- port_name, q_id);
- itt_rx_data[port_id].domains[q_id]
- = __itt_domain_create(domain_name);
-
- char task_name[ITT_MAX_NAME_LEN];
-
- snprintf(task_name, sizeof(task_name),
- "port id: %d; queue id: %d",
- port_id, q_id);
- itt_rx_data[port_id].handles[q_id]
- = __itt_string_handle_create(task_name);
-
- itt_rx_data[port_id].queue_state[q_id] = 0;
-
if (!rte_eth_add_rx_callback(
- port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+ port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
return -rte_errno;
}
}
return 0;
}
-#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
int
-__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
__rte_unused struct rte_eth_dev *dev)
{
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
- return itt_profile_rx_init(
- port_id, dev->data->name, dev->data->nb_rx_queues);
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
+ return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
#endif
return 0;
}