net/enic: use 64B completion queue entries if available
authorHyong Youb Kim <hyonkim@cisco.com>
Thu, 7 Jan 2021 14:01:54 +0000 (06:01 -0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 19 Jan 2021 02:30:32 +0000 (03:30 +0100)
Latest VIC adapters support 64B CQ (completion queue) entries as well
as 16B entries available on all VIC models. 64B entries can greatly
reduce cache contention (CPU stall cycles) between DMA writes (Rx
packet descriptors) and polling CPU. The effect is very noticeable on
Intel platforms with DDIO. As most UCS servers are based on Intel
platforms, enable and use 64B CQ entries by default, if
available. Also, add devarg 'cq64' so the user can explicitly disable
64B CQ.

Signed-off-by: Hyong Youb Kim <hyonkim@cisco.com>
Reviewed-by: John Daley <johndale@cisco.com>
doc/guides/nics/enic.rst
doc/guides/rel_notes/release_21_02.rst
drivers/net/enic/base/cq_enet_desc.h
drivers/net/enic/base/vnic_dev.c
drivers/net/enic/base/vnic_dev.h
drivers/net/enic/base/vnic_devcmd.h
drivers/net/enic/enic.h
drivers/net/enic/enic_ethdev.c
drivers/net/enic/enic_main.c
drivers/net/enic/enic_res.c
drivers/net/enic/enic_rxtx.c

index 5d1cc9f..4e7629c 100644 (file)
@@ -388,6 +388,31 @@ vectorized handler is selected, enable debug logging
 
     enic_use_vector_rx_handler use the non-scatter avx2 Rx handler
 
+64B Completion Queue Entry
+--------------------------
+
+Recent VIC adapters support 64B completion queue entries, as well as
+16B entries that are available on all adapter models. ENIC PMD enables
+and uses 64B entries by default, if available. 64B entries generally
+lower CPU cycles per Rx packet, as they avoid partial DMA writes and
+reduce cache contention between DMA and polling CPU. The effect is
+most pronounced when multiple Rx queues are used on Intel platforms
+with Data Direct I/O Technology (DDIO).
+
+If 64B entries are not available, PMD uses 16B entries. The user may
+explicitly disable 64B entries and use 16B entries by setting
+``devarg`` parameter ``cq64=0``. For example::
+
+    -a 12:00.0,cq64=0
+
+To verify the selected entry size, enable debug logging
+(``--log-level=enic,debug``) and check the following messages.
+
+.. code-block:: console
+
+    PMD: rte_enic_pmd: Supported CQ entry sizes: 16 32
+    PMD: rte_enic_pmd: Using 16B CQ entry size
+
 .. _enic_limitations:
 
 Limitations
index 5a9317a..55aba13 100644 (file)
@@ -70,6 +70,10 @@ New Features
 
   * Added support for Stingray2 device.
 
+* **Updated Cisco enic driver.**
+
+  * Added support for 64B completion queue entries
+
 * **Updated Mellanox mlx5 driver.**
 
   Updated the Mellanox mlx5 driver with new features and improvements, including:
index 602ac22..a34a4f5 100644 (file)
@@ -58,6 +58,19 @@ struct cq_enet_rq_clsf_desc {
        uint8_t type_color;
 };
 
+/* Completion queue descriptor: Ethernet receive queue, 64B */
+struct cq_enet_rq_desc_64 {
+       uint16_t completed_index_flags;
+       uint16_t q_number_rss_type_flags;
+       uint32_t rss_hash;
+       uint16_t bytes_written_flags;
+       uint16_t vlan;
+       uint16_t checksum_fcoe;
+       uint8_t flags;
+       uint8_t unused[48];
+       uint8_t type_color;
+};
+
 #define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT          (0x1 << 12)
 #define CQ_ENET_RQ_DESC_FLAGS_FCOE                  (0x1 << 13)
 #define CQ_ENET_RQ_DESC_FLAGS_EOP                   (0x1 << 14)
index aaca07c..526273c 100644 (file)
@@ -1320,3 +1320,27 @@ int vnic_dev_capable_geneve(struct vnic_dev *vdev)
        ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
        return ret == 0 && (a1 & FEATURE_GENEVE_OPTIONS);
 }
+
+uint64_t vnic_dev_capable_cq_entry_size(struct vnic_dev *vdev)
+{
+       uint64_t a0 = CMD_CQ_ENTRY_SIZE_SET;
+       uint64_t a1 = 0;
+       int wait = 1000;
+       int ret;
+
+       ret = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+       /* All models support 16B CQ entry by default */
+       if (!(ret == 0 && a0 == 0))
+               a1 = VNIC_RQ_CQ_ENTRY_SIZE_16_CAPABLE;
+       return a1;
+}
+
+int vnic_dev_set_cq_entry_size(struct vnic_dev *vdev, uint32_t rq_idx,
+                              uint32_t size_flag)
+{
+       uint64_t a0 = rq_idx;
+       uint64_t a1 = size_flag;
+       int wait = 1000;
+
+       return vnic_dev_cmd(vdev, CMD_CQ_ENTRY_SIZE_SET, &a0, &a1, wait);
+}
index 30ba57b..4b9f75b 100644 (file)
@@ -196,4 +196,8 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, uint8_t overlay,
        uint16_t vxlan_udp_port_number);
 int vnic_dev_capable_vxlan(struct vnic_dev *vdev);
 int vnic_dev_capable_geneve(struct vnic_dev *vdev);
+uint64_t vnic_dev_capable_cq_entry_size(struct vnic_dev *vdev);
+int vnic_dev_set_cq_entry_size(struct vnic_dev *vdev, uint32_t rq_idx,
+                              uint32_t size_flag);
+
 #endif /* _VNIC_DEV_H_ */
index a2f577f..96a7c22 100644 (file)
@@ -628,6 +628,24 @@ enum vnic_devcmd_cmd {
         * initialized to 0 to allow for better driver forward compatibility.
         */
        CMD_FLOW_MANAGER_OP = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 88),
+
+       /*
+        * Set extended CQ field in MREGS of RQ (or all RQs)
+        * for given vNIC
+        * in: (u64) a0 = RQ selection (VNIC_RQ_ALL for all RQs)
+        *     (u32) a1 = CQ entry size
+        *         VNIC_RQ_CQ_ENTRY_SIZE_16 --> 16 bytes
+        *         VNIC_RQ_CQ_ENTRY_SIZE_32 --> 32 bytes
+        *         VNIC_RQ_CQ_ENTRY_SIZE_64 --> 64 bytes
+        *
+        * Capability query:
+        * out: (u32) a0 = errno, 0:valid cmd
+        *      (u32) a1 = value consisting of supported entries
+        *         bit 0: 16 bytes
+        *         bit 1: 32 bytes
+        *         bit 2: 64 bytes
+        */
+       CMD_CQ_ENTRY_SIZE_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 90),
 };
 
 /* Modes for exchanging advanced filter capabilities. The modes supported by
@@ -1163,4 +1181,17 @@ typedef enum {
        GRPINTR_UPD_VECT,
 } grpintr_subcmd_t;
 
+/*
+ * Defines and Capabilities for CMD_CQ_ENTRY_SIZE_SET
+ */
+#define VNIC_RQ_ALL                    (~0ULL)
+
+#define VNIC_RQ_CQ_ENTRY_SIZE_16       0
+#define VNIC_RQ_CQ_ENTRY_SIZE_32       1
+#define VNIC_RQ_CQ_ENTRY_SIZE_64       2
+
+#define VNIC_RQ_CQ_ENTRY_SIZE_16_CAPABLE       (1 << VNIC_RQ_CQ_ENTRY_SIZE_16)
+#define VNIC_RQ_CQ_ENTRY_SIZE_32_CAPABLE       (1 << VNIC_RQ_CQ_ENTRY_SIZE_32)
+#define VNIC_RQ_CQ_ENTRY_SIZE_64_CAPABLE       (1 << VNIC_RQ_CQ_ENTRY_SIZE_64)
+
 #endif /* _VNIC_DEVCMD_H_ */
index 4ee7525..cd66348 100644 (file)
@@ -117,7 +117,10 @@ struct enic {
        uint8_t adv_filters;
        uint32_t flow_filter_mode;
        uint8_t filter_actions; /* HW supported actions */
+       uint64_t cq_entry_sizes; /* supported CQ entry sizes */
        bool vxlan;
+       bool cq64;            /* actually using 64B CQ entry */
+       bool cq64_request;    /* devargs cq64=1 */
        bool disable_overlay; /* devargs disable_overlay=1 */
        uint8_t enable_avx2_rx;  /* devargs enable-avx2-rx=1 */
        uint8_t geneve_opt_avail;    /* Geneve with options offload available */
@@ -419,6 +422,8 @@ void enic_free_consistent(void *priv, size_t size, void *vaddr,
                          dma_addr_t dma_handle);
 uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        uint16_t nb_pkts);
+uint16_t enic_recv_pkts_64(void *rx_queue, struct rte_mbuf **rx_pkts,
+                          uint16_t nb_pkts);
 uint16_t enic_noscatter_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                                  uint16_t nb_pkts);
 uint16_t enic_dummy_recv_pkts(void *rx_queue,
index d041a6b..1c8d60e 100644 (file)
@@ -64,6 +64,7 @@ static const struct vic_speed_capa {
        { 0, 0 }, /* End marker */
 };
 
+#define ENIC_DEVARG_CQ64 "cq64"
 #define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay"
 #define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx"
 #define ENIC_DEVARG_GENEVE_OPT "geneve-opt"
@@ -933,6 +934,8 @@ static int enicpmd_dev_rx_burst_mode_get(struct rte_eth_dev *dev,
                info_str = "Scalar No Scatter";
        else if (pkt_burst == enic_recv_pkts)
                info_str = "Scalar";
+       else if (pkt_burst == enic_recv_pkts_64)
+               info_str = "Scalar 64B Completion";
        if (info_str) {
                strlcpy(mode->info, info_str, sizeof(mode->info));
                ret = 0;
@@ -1145,6 +1148,8 @@ static int enic_parse_zero_one(const char *key,
                        ": expected=0|1 given=%s\n", key, value);
                return -EINVAL;
        }
+       if (strcmp(key, ENIC_DEVARG_CQ64) == 0)
+               enic->cq64_request = b;
        if (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0)
                enic->disable_overlay = b;
        if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0)
@@ -1190,6 +1195,7 @@ static int enic_parse_ig_vlan_rewrite(__rte_unused const char *key,
 static int enic_check_devargs(struct rte_eth_dev *dev)
 {
        static const char *const valid_keys[] = {
+               ENIC_DEVARG_CQ64,
                ENIC_DEVARG_DISABLE_OVERLAY,
                ENIC_DEVARG_ENABLE_AVX2_RX,
                ENIC_DEVARG_GENEVE_OPT,
@@ -1201,6 +1207,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
 
        ENICPMD_FUNC_TRACE();
 
+       enic->cq64_request = true; /* Use 64B entry if available */
        enic->disable_overlay = false;
        enic->enable_avx2_rx = false;
        enic->geneve_opt_request = false;
@@ -1210,7 +1217,9 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
        kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
        if (!kvlist)
                return -EINVAL;
-       if (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY,
+       if (rte_kvargs_process(kvlist, ENIC_DEVARG_CQ64,
+                              enic_parse_zero_one, enic) < 0 ||
+           rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY,
                               enic_parse_zero_one, enic) < 0 ||
            rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX,
                               enic_parse_zero_one, enic) < 0 ||
@@ -1382,6 +1391,7 @@ RTE_PMD_REGISTER_PCI(net_enic, rte_enic_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(net_enic,
+       ENIC_DEVARG_CQ64 "=0|1"
        ENIC_DEVARG_DISABLE_OVERLAY "=0|1 "
        ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 "
        ENIC_DEVARG_GENEVE_OPT "=0|1 "
index 755c0bf..805eb5e 100644 (file)
@@ -534,6 +534,11 @@ void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
 {
        struct enic *enic = pmd_priv(eth_dev);
 
+       if (enic->cq64) {
+               ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
+               eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
+               return;
+       }
        /*
         * Preference order:
         * 1. The vectorized handler if possible and requested.
@@ -951,8 +956,22 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
                }
                nb_data_desc = rq_data->ring.desc_count;
        }
+       /* Enable 64B CQ entry if requested */
+       if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
+                               sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
+               dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
+               goto err_free_rq_data;
+       }
+       if (rq_data->in_use && enic->cq64 &&
+           vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
+               VNIC_RQ_CQ_ENTRY_SIZE_64)) {
+               dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
+               goto err_free_rq_data;
+       }
+
        rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
                           socket_id, nb_sop_desc + nb_data_desc,
+                          enic->cq64 ? sizeof(struct cq_enet_rq_desc_64) :
                           sizeof(struct cq_enet_rq_desc));
        if (rc) {
                dev_err(enic, "error in allocation of cq for rq\n");
index 20888eb..1405db1 100644 (file)
@@ -25,6 +25,7 @@ int enic_get_vnic_config(struct enic *enic)
 {
        struct vnic_enet_config *c = &enic->config;
        int err;
+       uint64_t sizes;
 
        err = vnic_dev_get_mac_addr(enic->vdev, enic->mac_addr);
        if (err) {
@@ -182,6 +183,18 @@ int enic_get_vnic_config(struct enic *enic)
                dev_info(NULL, "Geneve with options offload available\n");
                enic->geneve_opt_avail = 1;
        }
+       /* Supported CQ entry sizes */
+       enic->cq_entry_sizes = vnic_dev_capable_cq_entry_size(enic->vdev);
+       sizes = enic->cq_entry_sizes;
+       dev_debug(NULL, "Supported CQ entry sizes:%s%s%s\n",
+                 (sizes & VNIC_RQ_CQ_ENTRY_SIZE_16_CAPABLE) ? " 16" : "",
+                 (sizes & VNIC_RQ_CQ_ENTRY_SIZE_32_CAPABLE) ? " 32" : "",
+                 (sizes & VNIC_RQ_CQ_ENTRY_SIZE_64_CAPABLE) ? " 64" : "");
+       /* Use 64B entry if requested and available */
+       enic->cq64 = enic->cq64_request &&
+               (sizes & VNIC_RQ_CQ_ENTRY_SIZE_64_CAPABLE);
+       dev_debug(NULL, "Using %sB CQ entry size\n", enic->cq64 ? "64" : "16");
+
        /*
         * Default hardware capabilities. enic_dev_init() may add additional
         * flags if it enables overlay offloads.
index 6a8718c..4b6b97e 100644 (file)
@@ -42,9 +42,9 @@ enic_dummy_recv_pkts(__rte_unused void *rx_queue,
        return 0;
 }
 
-uint16_t
-enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-              uint16_t nb_pkts)
+static inline uint16_t
+enic_recv_pkts_common(void *rx_queue, struct rte_mbuf **rx_pkts,
+                     uint16_t nb_pkts, const bool use_64b_desc)
 {
        struct vnic_rq *sop_rq = rx_queue;
        struct vnic_rq *data_rq;
@@ -62,10 +62,15 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        uint16_t seg_length;
        struct rte_mbuf *first_seg = sop_rq->pkt_first_seg;
        struct rte_mbuf *last_seg = sop_rq->pkt_last_seg;
+       const int desc_size = use_64b_desc ?
+               sizeof(struct cq_enet_rq_desc_64) :
+               sizeof(struct cq_enet_rq_desc);
+       RTE_BUILD_BUG_ON(sizeof(struct cq_enet_rq_desc_64) != 64);
 
        cq = &enic->cq[enic_cq_rq(enic, sop_rq->index)];
        cq_idx = cq->to_clean;          /* index of cqd, rqd, mbuf_table */
-       cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx;
+       cqd_ptr = (struct cq_desc *)((uintptr_t)(cq->ring.descs) +
+                                    cq_idx * desc_size);
        color = cq->last_color;
 
        data_rq = &enic->rq[sop_rq->data_queue_idx];
@@ -78,15 +83,26 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                struct cq_desc cqd;
                uint8_t packet_error;
                uint16_t ciflags;
+               uint8_t tc;
 
                max_rx--;
 
+               tc = *(volatile uint8_t *)((uintptr_t)cqd_ptr + desc_size - 1);
                /* Check for pkts available */
-               if ((cqd_ptr->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
+               if ((tc & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
                        break;
 
                /* Get the cq descriptor and extract rq info from it */
                cqd = *cqd_ptr;
+               /*
+                * The first 16B of 64B descriptor is identical to the
+                * 16B descriptor, except type_color. Copy type_color
+                * from the 64B descriptor into the 16B descriptor's
+                * field, so the code below can assume the 16B
+                * descriptor format.
+                */
+               if (use_64b_desc)
+                       cqd.type_color = tc;
                rq_num = cqd.q_number & CQ_DESC_Q_NUM_MASK;
                rq_idx = cqd.completed_index & CQ_DESC_COMP_NDX_MASK;
 
@@ -109,7 +125,8 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                cq_idx++;
 
                /* Prefetch next mbuf & desc while processing current one */
-               cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx;
+               cqd_ptr = (struct cq_desc *)((uintptr_t)(cq->ring.descs) +
+                                            cq_idx * desc_size);
                rte_enic_prefetch(cqd_ptr);
 
                ciflags = enic_cq_rx_desc_ciflags(
@@ -215,6 +232,18 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        return nb_rx;
 }
 
+uint16_t
+enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       return enic_recv_pkts_common(rx_queue, rx_pkts, nb_pkts, false);
+}
+
+uint16_t
+enic_recv_pkts_64(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       return enic_recv_pkts_common(rx_queue, rx_pkts, nb_pkts, true);
+}
+
 uint16_t
 enic_noscatter_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                         uint16_t nb_pkts)