ethdev: change vtune profiling approach
authorIlia Kurakin <ilia.kurakin@intel.com>
Thu, 19 Jul 2018 12:21:42 +0000 (15:21 +0300)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 28 Aug 2018 13:27:39 +0000 (15:27 +0200)
The patch changes rx_burst profiling approach:
1. VTune's instrumentation is removed
2. empty hook callback for profiling is added
This way all VTune-specific logic moves to the VTune side.
Hook is enabled only when CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE option
is turned on. VTune uses this hook to attach to the polling cycle. It
is not possible to attach to the rx_burst directly, as it is inline.

Signed-off-by: Ilia Kurakin <ilia.kurakin@intel.com>
Acked-by: Keith Wiles <keith.wiles@intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
config/common_base
doc/guides/prog_guide/profile_app.rst
lib/librte_ethdev/ethdev_profile.c
lib/librte_ethdev/ethdev_profile.h
lib/librte_ethdev/rte_ethdev.c

index 4bcbaf9..155c7d4 100644 (file)
@@ -128,7 +128,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
-CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
+CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
 
 #
 # Turn off Tx preparation stage
index 1106216..02f0561 100644 (file)
@@ -33,38 +33,12 @@ Refer to the
 for details about application profiling.
 
 
-Empty cycles tracing
+Profiling with VTune
 ~~~~~~~~~~~~~~~~~~~~
 
-Iterations that yielded no RX packets (empty cycles, wasted iterations) can
-be analyzed using VTune Amplifier. This profiling employs the
-`Instrumentation and Tracing Technology (ITT) API
-<https://software.intel.com/en-us/node/544195>`_
-feature of VTune Amplifier and requires only reconfiguring the DPDK library,
-no changes in a DPDK application are needed.
-
-To trace wasted iterations on RX queues, first reconfigure DPDK with
-``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
-``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
-
-Then rebuild DPDK, specifying paths to the ITT header and library, which can
-be found in any VTune Amplifier distribution in the *include* and *lib*
-directories respectively:
-
-.. code-block:: console
-
-    make EXTRA_CFLAGS=-I<path to ittnotify.h> \
-         EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
-
-Finally, to see wasted iterations in your performance analysis results,
-select the *"Analyze user tasks, events, and counters"* checkbox in the
-*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
-Alternatively, when running VTune Amplifier via command line, specify
-``-knob enable-user-tasks=true`` option.
-
-Collected regions of wasted iterations will be marked on VTune Amplifier's
-timeline as ITT tasks. These ITT tasks have predefined names, containing
-Ethernet device and RX queue identifiers.
+To allow VTune attaching to the DPDK application, reconfigure and recompile
+the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
+``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
 
 
 Profiling on ARM64
index 0d1dcda..a3c303f 100644 (file)
@@ -1,87 +1,33 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include "ethdev_profile.h"
 
 /**
- * This conditional block enables RX queues profiling by tracking wasted
- * iterations, i.e. iterations which yielded no RX packets. Profiling is
- * performed using the Instrumentation and Tracing Technology (ITT) API,
- * employed by the Intel (R) VTune (TM) Amplifier.
+ * This conditional block enables Ethernet device profiling with
+ * Intel (R) VTune (TM) Amplifier.
  */
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-
-#include <ittnotify.h>
-
-#define ITT_MAX_NAME_LEN (100)
-
-/**
- * Auxiliary ITT structure belonging to Ethernet device and using to:
- *   -  track RX queue state to determine whether it is wasting loop iterations
- *   -  begin or end ITT task using task domain and task name (handle)
- */
-struct itt_profile_rx_data {
-       /**
-        * ITT domains for each queue.
-        */
-       __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
-       /**
-        * ITT task names for each queue.
-        */
-       __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
-       /**
-        * Flags indicating the queues state. Possible values:
-        *   1 - queue is wasting iterations,
-        *   0 - otherwise.
-        */
-       uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
-};
-
-/**
- * The pool of *itt_profile_rx_data* structures.
- */
-struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
-
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
 
 /**
- * This callback function manages ITT tasks collection on given port and queue.
- * It must be registered with rte_eth_add_rx_callback() to be called from
- * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
- * type declaration.
+ * Hook callback to trace rte_eth_rx_burst() calls.
  */
-static uint16_t
-collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+uint16_t
+profile_hook_rx_burst_cb(
+       __rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
        __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
        __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
 {
-       if (unlikely(nb_pkts == 0)) {
-               if (!itt_rx_data[port_id].queue_state[queue_id]) {
-                       __itt_task_begin(
-                               itt_rx_data[port_id].domains[queue_id],
-                               __itt_null, __itt_null,
-                               itt_rx_data[port_id].handles[queue_id]);
-                       itt_rx_data[port_id].queue_state[queue_id] = 1;
-               }
-       } else {
-               if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
-                       __itt_task_end(
-                               itt_rx_data[port_id].domains[queue_id]);
-                       itt_rx_data[port_id].queue_state[queue_id] = 0;
-               }
-       }
        return nb_pkts;
 }
 
 /**
- * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * Setting profiling rx callback for a given Ethernet device.
  * This function must be invoked when ethernet device is being configured.
- * Result will be stored in the global array *itt_rx_data*.
  *
  * @param port_id
  *  The port identifier of the Ethernet device.
- * @param port_name
- *  The name of the Ethernet device.
  * @param rx_queue_num
  *  The number of RX queues on specified port.
  *
@@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
  *  - On failure, a negative value.
  */
 static inline int
-itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
 {
        uint16_t q_id;
 
        for (q_id = 0; q_id < rx_queue_num; ++q_id) {
-               char domain_name[ITT_MAX_NAME_LEN];
-
-               snprintf(domain_name, sizeof(domain_name),
-                       "RXBurst.WastedIterations.Port_%s.Queue_%d",
-                       port_name, q_id);
-               itt_rx_data[port_id].domains[q_id]
-                       = __itt_domain_create(domain_name);
-
-               char task_name[ITT_MAX_NAME_LEN];
-
-               snprintf(task_name, sizeof(task_name),
-                       "port id: %d; queue id: %d",
-                       port_id, q_id);
-               itt_rx_data[port_id].handles[q_id]
-                       = __itt_string_handle_create(task_name);
-
-               itt_rx_data[port_id].queue_state[q_id] = 0;
-
                if (!rte_eth_add_rx_callback(
-                       port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+                       port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
                        return -rte_errno;
                }
        }
 
        return 0;
 }
-#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
 
 int
-__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
        __rte_unused struct rte_eth_dev *dev)
 {
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-       return itt_profile_rx_init(
-               port_id, dev->data->name, dev->data->nb_rx_queues);
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
+       return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
 #endif
        return 0;
 }
index e5ea368..65031e6 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _RTE_ETHDEV_PROFILE_H_
@@ -8,7 +8,7 @@
 #include "rte_ethdev.h"
 
 /**
- * Initialization of profiling RX queues for the Ethernet device.
+ * Initialization of the Ethernet device profiling.
  * Implementation of this function depends on chosen profiling method,
  * defined in configs.
  *
@@ -22,6 +22,6 @@
  *  - On failure, a negative value.
  */
 int
-__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
+__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
 
 #endif
index 4c32025..f32722f 100644 (file)
@@ -1191,9 +1191,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
        }
 
        /* Initialize Rx profiling if enabled at compilation time. */
-       diag = __rte_eth_profile_rx_init(port_id, dev);
+       diag = __rte_eth_dev_profile_init(port_id, dev);
        if (diag != 0) {
-               RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+               RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n",
                        port_id, diag);
                rte_eth_dev_rx_queue_config(dev, 0);
                rte_eth_dev_tx_queue_config(dev, 0);