----------------
Intel processors provide performance counters to monitor events.
-Some tools provided by Intel, such as VTune, can be used to profile and benchmark an application.
+Some tools provided by Intel, such as Intel® VTune™ Amplifier, can be used
+to profile and benchmark an application.
See the *VTune Performance Analyzer Essentials* publication from Intel Press for more information.
For a DPDK application, this can be done in a Linux* application environment only.
for details about application profiling.
+Empty cycles tracing
+~~~~~~~~~~~~~~~~~~~~
+
+Iterations that yielded no RX packets (empty cycles, wasted iterations) can
+be analyzed using VTune Amplifier. This profiling employs the
+`Instrumentation and Tracing Technology (ITT) API
+<https://software.intel.com/en-us/node/544195>`_
+feature of VTune Amplifier and requires only reconfiguring the DPDK library,
+no changes in a DPDK application are needed.
+
+To trace wasted iterations on RX queues, first reconfigure DPDK with
+``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
+``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
+
+Then rebuild DPDK, specifying paths to the ITT header and library, which can
+be found in any VTune Amplifier distribution in the *include* and *lib*
+directories respectively:
+
+.. code-block:: console
+
+ make EXTRA_CFLAGS=-I<path to ittnotify.h> \
+ EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
+
+Finally, to see wasted iterations in your performance analysis results,
+select the *"Analyze user tasks, events, and counters"* checkbox in the
+*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
+Alternatively, when running VTune Amplifier via command line, specify
+``-knob enable-user-tasks=true`` option.
+
+Collected regions of wasted iterations will be marked on VTune Amplifier's
+timeline as ITT tasks. These ITT tasks have predefined names, containing
+Ethernet device and RX queue identifiers.
+
+
Profiling on ARM64
------------------
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_config.h>
+
+#include "ethdev_profile.h"
+
+/**
+ * This conditional block enables RX queues profiling by tracking wasted
+ * iterations, i.e. iterations which yielded no RX packets. Profiling is
+ * performed using the Instrumentation and Tracing Technology (ITT) API,
+ * employed by the Intel (R) VTune (TM) Amplifier.
+ */
+#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
+
+#include <ittnotify.h>
+
+#define ITT_MAX_NAME_LEN (100)
+
+/**
+ * Auxiliary ITT structure belonging to Ethernet device and using to:
+ * - track RX queue state to determine whether it is wasting loop iterations
+ * - begin or end ITT task using task domain and task name (handle)
+ */
+struct itt_profile_rx_data {
+ /**
+ * ITT domains for each queue.
+ */
+ __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
+ /**
+ * ITT task names for each queue.
+ */
+ __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
+ /**
+ * Flags indicating the queues state. Possible values:
+ * 1 - queue is wasting iterations,
+ * 0 - otherwise.
+ */
+ uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
+};
+
+/**
+ * The pool of *itt_profile_rx_data* structures.
+ */
+struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
+
+
+/**
+ * This callback function manages ITT tasks collection on given port and queue.
+ * It must be registered with rte_eth_add_rx_callback() to be called from
+ * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
+ * type declaration.
+ */
+static uint16_t
+collect_itt_rx_burst_cb(uint8_t port_id, uint16_t queue_id,
+ __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ if (unlikely(nb_pkts == 0)) {
+ if (!itt_rx_data[port_id].queue_state[queue_id]) {
+ __itt_task_begin(
+ itt_rx_data[port_id].domains[queue_id],
+ __itt_null, __itt_null,
+ itt_rx_data[port_id].handles[queue_id]);
+ itt_rx_data[port_id].queue_state[queue_id] = 1;
+ }
+ } else {
+ if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
+ __itt_task_end(
+ itt_rx_data[port_id].domains[queue_id]);
+ itt_rx_data[port_id].queue_state[queue_id] = 0;
+ }
+ }
+ return nb_pkts;
+}
+
+/**
+ * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * This function must be invoked when ethernet device is being configured.
+ * Result will be stored in the global array *itt_rx_data*.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param port_name
+ * The name of the Ethernet device.
+ * @param rx_queue_num
+ * The number of RX queues on specified port.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static inline int
+itt_profile_rx_init(uint8_t port_id, char *port_name, uint8_t rx_queue_num)
+{
+ uint16_t q_id;
+
+ for (q_id = 0; q_id < rx_queue_num; ++q_id) {
+ char domain_name[ITT_MAX_NAME_LEN];
+
+ snprintf(domain_name, sizeof(domain_name),
+ "RXBurst.WastedIterations.Port_%s.Queue_%d",
+ port_name, q_id);
+ itt_rx_data[port_id].domains[q_id]
+ = __itt_domain_create(domain_name);
+
+ char task_name[ITT_MAX_NAME_LEN];
+
+ snprintf(task_name, sizeof(task_name),
+ "port id: %d; queue id: %d",
+ port_id, q_id);
+ itt_rx_data[port_id].handles[q_id]
+ = __itt_string_handle_create(task_name);
+
+ itt_rx_data[port_id].queue_state[q_id] = 0;
+
+ if (!rte_eth_add_rx_callback(
+ port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+ return -rte_errno;
+ }
+ }
+
+ return 0;
+}
+#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+
+int
+__rte_eth_profile_rx_init(__rte_unused uint8_t port_id,
+ __rte_unused struct rte_eth_dev *dev)
+{
+#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
+ return itt_profile_rx_init(
+ port_id, dev->data->name, dev->data->nb_rx_queues);
+#endif
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHDEV_PROFILE_H_
+#define _RTE_ETHDEV_PROFILE_H_
+
+#include "rte_ethdev.h"
+
+/**
+ * Initialization of profiling RX queues for the Ethernet device.
+ * Implementation of this function depends on chosen profiling method,
+ * defined in configs.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param dev
+ * Pointer to struct rte_eth_dev corresponding to given port_id.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+__rte_eth_profile_rx_init(uint8_t port_id, struct rte_eth_dev *dev);
+
+#endif