mlx4: avoid looking up work request id to improve Rx performance
[dpdk.git] / drivers / net / mlx4 / mlx4.c
index fde23e1..08b1b81 100644 (file)
@@ -200,6 +200,7 @@ struct rxq {
        struct ibv_exp_flow *allmulti_flow; /* Multicast flow. */
        unsigned int port_id; /* Port ID for incoming packets. */
        unsigned int elts_n; /* (*elts)[] length. */
+       unsigned int elts_head; /* Current index in (*elts)[]. */
        union {
                struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
                struct rxq_elt (*no_sp)[]; /* RX elements. */
@@ -337,9 +338,11 @@ priv_unlock(struct priv *priv)
 static int
 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 {
-       int ret = -1;
        DIR *dir;
        struct dirent *dent;
+       unsigned int dev_type = 0;
+       unsigned int dev_port_prev = ~0u;
+       char match[IF_NAMESIZE] = "";
 
        {
                MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
@@ -351,7 +354,7 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
        while ((dent = readdir(dir)) != NULL) {
                char *name = dent->d_name;
                FILE *file;
-               unsigned int dev_id;
+               unsigned int dev_port;
                int r;
 
                if ((name[0] == '.') &&
@@ -359,22 +362,47 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
                     ((name[1] == '.') && (name[2] == '\0'))))
                        continue;
 
-               MKSTR(path, "%s/device/net/%s/dev_id",
-                     priv->ctx->device->ibdev_path, name);
+               MKSTR(path, "%s/device/net/%s/%s",
+                     priv->ctx->device->ibdev_path, name,
+                     (dev_type ? "dev_id" : "dev_port"));
 
                file = fopen(path, "rb");
-               if (file == NULL)
+               if (file == NULL) {
+                       if (errno != ENOENT)
+                               continue;
+                       /*
+                        * Switch to dev_id when dev_port does not exist as
+                        * is the case with Linux kernel versions < 3.15.
+                        */
+try_dev_id:
+                       match[0] = '\0';
+                       if (dev_type)
+                               break;
+                       dev_type = 1;
+                       dev_port_prev = ~0u;
+                       rewinddir(dir);
                        continue;
-               r = fscanf(file, "%x", &dev_id);
-               fclose(file);
-               if ((r == 1) && (dev_id == (priv->port - 1u))) {
-                       snprintf(*ifname, sizeof(*ifname), "%s", name);
-                       ret = 0;
-                       break;
                }
+               r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
+               fclose(file);
+               if (r != 1)
+                       continue;
+               /*
+                * Switch to dev_id when dev_port returns the same value for
+                * all ports. May happen when using a MOFED release older than
+                * 3.0 with a Linux kernel >= 3.15.
+                */
+               if (dev_port == dev_port_prev)
+                       goto try_dev_id;
+               dev_port_prev = dev_port;
+               if (dev_port == (priv->port - 1u))
+                       snprintf(match, sizeof(match), "%s", name);
        }
        closedir(dir);
-       return ret;
+       if (match[0] == '\0')
+               return -1;
+       strncpy(*ifname, match, sizeof(*ifname));
+       return 0;
 }
 
 /**
@@ -1104,10 +1132,10 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        linearize = 1;
                }
                /* Set WR fields. */
-               assert(((uintptr_t)rte_pktmbuf_mtod(buf, char *) -
+               assert((rte_pktmbuf_mtod(buf, uintptr_t) -
                        (uintptr_t)buf) <= 0xffff);
                WR_ID(wr->wr_id).offset =
-                       ((uintptr_t)rte_pktmbuf_mtod(buf, char *) -
+                       (rte_pktmbuf_mtod(buf, uintptr_t) -
                         (uintptr_t)buf);
                wr->num_sge = segs;
                /* Register segments as SGEs. */
@@ -1142,7 +1170,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        assert(sge->length == 0);
                        assert(sge->lkey == 0);
                        /* Update SGE. */
-                       sge->addr = (uintptr_t)rte_pktmbuf_mtod(buf, char *);
+                       sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
                        if (txq->priv->vf)
                                rte_prefetch0((volatile void *)
                                        (uintptr_t)sge->addr);
@@ -1593,8 +1621,7 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
                        assert(sizeof(sge->addr) >= sizeof(uintptr_t));
                        if (j == 0) {
                                /* The first SGE keeps its headroom. */
-                               sge->addr = (uintptr_t)rte_pktmbuf_mtod(buf,
-                                                                       char *);
+                               sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
                                sge->length = (buf->buf_len -
                                               RTE_PKTMBUF_HEADROOM);
                        } else {
@@ -1614,6 +1641,7 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
        DEBUG("%p: allocated and configured %u WRs (%zu segments)",
              (void *)rxq, elts_n, (elts_n * elemof((*elts)[0].sges)));
        rxq->elts_n = elts_n;
+       rxq->elts_head = 0;
        rxq->elts.sp = elts;
        assert(ret == 0);
        return 0;
@@ -1759,6 +1787,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
        DEBUG("%p: allocated and configured %u single-segment WRs",
              (void *)rxq, elts_n);
        rxq->elts_n = elts_n;
+       rxq->elts_head = 0;
        rxq->elts.no_sp = elts;
        assert(ret == 0);
        return 0;
@@ -2294,6 +2323,8 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct rxq *rxq = (struct rxq *)dpdk_rxq;
        struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
+       const unsigned int elts_n = rxq->elts_n;
+       unsigned int elts_head = rxq->elts_head;
        struct ibv_wc wcs[pkts_n];
        struct ibv_recv_wr head;
        struct ibv_recv_wr **next = &head.next;
@@ -2320,7 +2351,7 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                struct ibv_wc *wc = &wcs[i];
                uint64_t wr_id = wc->wr_id;
                uint32_t len = wc->byte_len;
-               struct rxq_elt_sp *elt = &(*elts)[wr_id];
+               struct rxq_elt_sp *elt = &(*elts)[elts_head];
                struct ibv_recv_wr *wr = &elt->wr;
                struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
                struct rte_mbuf **pkt_buf_next = &pkt_buf;
@@ -2328,10 +2359,15 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                unsigned int j = 0;
 
                /* Sanity checks. */
+#ifdef NDEBUG
+               (void)wr_id;
+#endif
                assert(wr_id < rxq->elts_n);
                assert(wr_id == wr->wr_id);
                assert(wr->sg_list == elt->sges);
                assert(wr->num_sge == elemof(elt->sges));
+               assert(elts_head < rxq->elts_n);
+               assert(rxq->elts_head < rxq->elts_n);
                /* Link completed WRs together for repost. */
                *next = wr;
                next = &wr->next;
@@ -2371,8 +2407,10 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
                                      " can't allocate a new mbuf",
                                      (void *)rxq, wr_id);
-                               if (pkt_buf != NULL)
+                               if (pkt_buf != NULL) {
+                                       *pkt_buf_next = NULL;
                                        rte_pktmbuf_free(pkt_buf);
+                               }
                                /* Increase out of memory counters. */
                                ++rxq->stats.rx_nombuf;
                                ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
@@ -2440,6 +2478,8 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                rxq->stats.ibytes += wc->byte_len;
 #endif
 repost:
+               if (++elts_head >= elts_n)
+                       elts_head = 0;
                continue;
        }
        *next = NULL;
@@ -2457,6 +2497,7 @@ repost:
                      strerror(i));
                abort();
        }
+       rxq->elts_head = elts_head;
 #ifdef MLX4_PMD_SOFT_COUNTERS
        /* Increase packets counter. */
        rxq->stats.ipackets += ret;
@@ -2486,6 +2527,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct rxq *rxq = (struct rxq *)dpdk_rxq;
        struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
+       const unsigned int elts_n = rxq->elts_n;
+       unsigned int elts_head = rxq->elts_head;
        struct ibv_wc wcs[pkts_n];
        struct ibv_recv_wr head;
        struct ibv_recv_wr **next = &head.next;
@@ -2510,7 +2553,7 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                struct ibv_wc *wc = &wcs[i];
                uint64_t wr_id = wc->wr_id;
                uint32_t len = wc->byte_len;
-               struct rxq_elt *elt = &(*elts)[WR_ID(wr_id).id];
+               struct rxq_elt *elt = &(*elts)[elts_head];
                struct ibv_recv_wr *wr = &elt->wr;
                struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
                        WR_ID(wr_id).offset);
@@ -2521,6 +2564,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                assert(wr_id == wr->wr_id);
                assert(wr->sg_list == &elt->sge);
                assert(wr->num_sge == 1);
+               assert(elts_head < rxq->elts_n);
+               assert(rxq->elts_head < rxq->elts_n);
                /* Link completed WRs together for repost. */
                *next = wr;
                next = &wr->next;
@@ -2581,6 +2626,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                rxq->stats.ibytes += wc->byte_len;
 #endif
 repost:
+               if (++elts_head >= elts_n)
+                       elts_head = 0;
                continue;
        }
        *next = NULL;
@@ -2598,6 +2645,7 @@ repost:
                      strerror(i));
                abort();
        }
+       rxq->elts_head = elts_head;
 #ifdef MLX4_PMD_SOFT_COUNTERS
        /* Increase packets counter. */
        rxq->stats.ipackets += ret;
@@ -4430,17 +4478,18 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                struct ibv_pd *pd = NULL;
                struct priv *priv = NULL;
                struct rte_eth_dev *eth_dev;
-#if defined(INLINE_RECV) || defined(RSS_SUPPORT)
+#ifdef HAVE_EXP_QUERY_DEVICE
                struct ibv_exp_device_attr exp_device_attr;
-#endif
+#endif /* HAVE_EXP_QUERY_DEVICE */
                struct ether_addr mac;
                union ibv_gid temp_gid;
 
+#ifdef HAVE_EXP_QUERY_DEVICE
+               exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
 #ifdef RSS_SUPPORT
-               exp_device_attr.comp_mask =
-                       (IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
-                        IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ);
+               exp_device_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
 #endif /* RSS_SUPPORT */
+#endif /* HAVE_EXP_QUERY_DEVICE */
 
                DEBUG("using port %u (%08" PRIx32 ")", port, test);
 
@@ -4485,11 +4534,12 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                priv->port = port;
                priv->pd = pd;
                priv->mtu = ETHER_MTU;
-#ifdef RSS_SUPPORT
+#ifdef HAVE_EXP_QUERY_DEVICE
                if (ibv_exp_query_device(ctx, &exp_device_attr)) {
-                       INFO("experimental ibv_exp_query_device");
+                       ERROR("ibv_exp_query_device() failed");
                        goto port_error;
                }
+#ifdef RSS_SUPPORT
                if ((exp_device_attr.exp_device_cap_flags &
                     IBV_EXP_DEVICE_QPG) &&
                    (exp_device_attr.exp_device_cap_flags &
@@ -4541,6 +4591,7 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                             priv->inl_recv_size);
                }
 #endif /* INLINE_RECV */
+#endif /* HAVE_EXP_QUERY_DEVICE */
 
                (void)mlx4_getenv_int;
                priv->vf = vf;