net/mlx5: handle LRO packets in regular Rx queue
[dpdk.git] / drivers / net / mlx5 / mlx5_rxq.c
index b87eecc..3705d07 100644 (file)
@@ -93,7 +93,6 @@ mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
 
 /**
  * Check whether Multi-Packet RQ is enabled for the device.
- * MPRQ can be enabled explicitly, or implicitly by enabling LRO.
  *
  * @param dev
  *   Pointer to Ethernet device.
@@ -573,8 +572,10 @@ rxq_release_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl)
                rte_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes);
                rxq_ctrl->rxq.wqes = NULL;
        }
-       if (rxq_ctrl->wq_umem)
+       if (rxq_ctrl->wq_umem) {
                mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem);
+               rxq_ctrl->wq_umem = NULL;
+       }
 }
 
 /**
@@ -1088,7 +1089,7 @@ mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn)
        struct mlx5_rxq_ctrl *rxq_ctrl =
                container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
        struct mlx5_devx_create_rq_attr rq_attr;
-       uint32_t wqe_n = 1 << rxq_data->elts_n;
+       uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n);
        uint32_t wq_size = 0;
        uint32_t wqe_size = 0;
        uint32_t log_wqe_size = 0;
@@ -1116,21 +1117,16 @@ mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn)
                                MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
                wqe_size = sizeof(struct mlx5_wqe_mprq);
        } else {
-               int max_sge = 0;
-               int num_scatter = 0;
-
                rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
-               max_sge = 1 << rxq_data->sges_n;
-               num_scatter = RTE_MAX(max_sge, 1);
-               wqe_size = sizeof(struct mlx5_wqe_data_seg) * num_scatter;
+               wqe_size = sizeof(struct mlx5_wqe_data_seg);
        }
-       log_wqe_size = log2above(wqe_size);
+       log_wqe_size = log2above(wqe_size) + rxq_data->sges_n;
        rq_attr.wq_attr.log_wq_stride = log_wqe_size;
-       rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n;
+       rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n;
        /* Calculate and allocate WQ memory space. */
        wqe_size = 1 << log_wqe_size; /* round up power of two.*/
        wq_size = wqe_n * wqe_size;
-       buf = rte_calloc_socket(__func__, 1, wq_size, RTE_CACHE_LINE_SIZE,
+       buf = rte_calloc_socket(__func__, 1, wq_size, MLX5_WQE_BUF_ALIGNMENT,
                                rxq_ctrl->socket);
        if (!buf)
                return NULL;
@@ -1242,6 +1238,7 @@ mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
                        goto error;
                rxq_ctrl->dbr_offset = dbr_offset;
                rxq_ctrl->dbr_umem_id = dbr_page->umem->umem_id;
+               rxq_ctrl->dbr_umem_id_valid = 1;
                rxq_data->rq_db = (uint32_t *)((uintptr_t)dbr_page->dbrs +
                                               (uintptr_t)rxq_ctrl->dbr_offset);
        }
@@ -1358,14 +1355,22 @@ mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
  * Callback function to initialize mbufs for Multi-Packet RQ.
  */
 static inline void
-mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg __rte_unused,
+mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
                    void *_m, unsigned int i __rte_unused)
 {
        struct mlx5_mprq_buf *buf = _m;
+       struct rte_mbuf_ext_shared_info *shinfo;
+       unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
+       unsigned int j;
 
        memset(_m, 0, sizeof(*buf));
        buf->mp = mp;
        rte_atomic16_set(&buf->refcnt, 1);
+       for (j = 0; j != strd_n; ++j) {
+               shinfo = &buf->shinfos[j];
+               shinfo->free_cb = mlx5_mprq_buf_free_cb;
+               shinfo->fcb_opaque = buf;
+       }
 }
 
 /**
@@ -1460,7 +1465,8 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
        }
        assert(strd_num_n && strd_sz_n);
        buf_len = (1 << strd_num_n) * (1 << strd_sz_n);
-       obj_size = buf_len + sizeof(struct mlx5_mprq_buf);
+       obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) *
+               sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM;
        /*
         * Received packets can be either memcpy'd or externally referenced. In
         * case that the packet is attached to an mbuf as an external buffer, as
@@ -1505,7 +1511,8 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
        }
        snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
        mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
-                               0, NULL, NULL, mlx5_mprq_buf_init, NULL,
+                               0, NULL, NULL, mlx5_mprq_buf_init,
+                               (void *)(uintptr_t)(1 << strd_num_n),
                                dev->device->numa_node, 0);
        if (mp == NULL) {
                DRV_LOG(ERR,
@@ -1530,6 +1537,42 @@ exit:
        return 0;
 }
 
+#define MLX5_MAX_LRO_SIZE (UINT8_MAX * 256u)
+#define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
+                                       sizeof(struct rte_vlan_hdr) * 2 + \
+                                       sizeof(struct rte_ipv6_hdr)))
+#define MAX_TCP_OPTION_SIZE 40u
+#define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
+                                sizeof(struct rte_tcp_hdr) + \
+                                MAX_TCP_OPTION_SIZE))
+
+/**
+ * Adjust the maximum LRO massage size.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param max_lro_size
+ *   The maximum size for LRO packet.
+ */
+static void
+mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint32_t max_lro_size)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       if (priv->config.hca_attr.lro_max_msg_sz_mode ==
+           MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
+           MLX5_MAX_TCP_HDR_OFFSET)
+               max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
+       max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
+       assert(max_lro_size >= 256u);
+       max_lro_size /= 256u;
+       if (priv->max_lro_msg_size)
+               priv->max_lro_msg_size =
+                       RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
+       else
+               priv->max_lro_msg_size = max_lro_size;
+}
+
 /**
  * Create a DPDK Rx queue.
  *
@@ -1555,6 +1598,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
        unsigned int mprq_stride_size;
        struct mlx5_dev_config *config = &priv->config;
+       unsigned int strd_headroom_en;
        /*
         * Always allocate extra slots, even if eventually
         * the vector Rx will not be used.
@@ -1564,7 +1608,22 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        uint64_t offloads = conf->offloads |
                           dev->data->dev_conf.rxmode.offloads;
        const int mprq_en = mlx5_check_mprq_support(dev) > 0;
-
+       unsigned int max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
+       unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len +
+                                                       RTE_PKTMBUF_HEADROOM;
+       unsigned int max_lro_size = 0;
+       unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
+
+       if (non_scatter_min_mbuf_size > mb_len && !(offloads &
+                                                   DEV_RX_OFFLOAD_SCATTER)) {
+               DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
+                       " configured and no enough mbuf space(%u) to contain "
+                       "the maximum RX packet length(%u) with head-room(%u)",
+                       dev->data->port_id, idx, mb_len, max_rx_pkt_len,
+                       RTE_PKTMBUF_HEADROOM);
+               rte_errno = ENOSPC;
+               return NULL;
+       }
        tmpl = rte_calloc_socket("RXQ", 1,
                                 sizeof(*tmpl) +
                                 desc_n * sizeof(struct rte_mbuf *),
@@ -1581,6 +1640,21 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        tmpl->socket = socket;
        if (dev->data->dev_conf.intr_conf.rxq)
                tmpl->irq = 1;
+       /*
+        * LRO packet may consume all the stride memory, hence we cannot
+        * guaranty head-room near the packet memory in the stride.
+        * In this case scatter is, for sure, enabled and an empty mbuf may be
+        * added in the start for the head-room.
+        */
+       if (mlx5_lro_on(dev) && RTE_PKTMBUF_HEADROOM > 0 &&
+           non_scatter_min_mbuf_size > mb_len) {
+               strd_headroom_en = 0;
+               mprq_stride_size = RTE_MIN(max_rx_pkt_len,
+                                       1u << config->mprq.max_stride_size_n);
+       } else {
+               strd_headroom_en = 1;
+               mprq_stride_size = non_scatter_min_mbuf_size;
+       }
        /*
         * This Rx queue can be configured as a Multi-Packet RQ if all of the
         * following conditions are met:
@@ -1590,11 +1664,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
         *    stride.
         *  Otherwise, enable Rx scatter if necessary.
         */
-       assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-       mprq_stride_size =
-               dev->data->dev_conf.rxmode.max_rx_pkt_len +
-               sizeof(struct rte_mbuf_ext_shared_info) +
-               RTE_PKTMBUF_HEADROOM;
        if (mprq_en &&
            desc > (1U << config->mprq.stride_num_n) &&
            mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) {
@@ -1606,50 +1675,49 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size),
                                              config->mprq.min_stride_size_n);
                tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
-               tmpl->rxq.mprq_max_memcpy_len =
-                       RTE_MIN(mb_len - RTE_PKTMBUF_HEADROOM,
+               tmpl->rxq.strd_headroom_en = strd_headroom_en;
+               tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
                                config->mprq.max_memcpy_len);
+               max_lro_size = RTE_MIN(max_rx_pkt_len,
+                                      (1u << tmpl->rxq.strd_num_n) *
+                                      (1u << tmpl->rxq.strd_sz_n));
                DRV_LOG(DEBUG,
                        "port %u Rx queue %u: Multi-Packet RQ is enabled"
                        " strd_num_n = %u, strd_sz_n = %u",
                        dev->data->port_id, idx,
                        tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n);
-       } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-                  (mb_len - RTE_PKTMBUF_HEADROOM)) {
+       } else if (max_rx_pkt_len <= first_mb_free_size) {
                tmpl->rxq.sges_n = 0;
+               max_lro_size = max_rx_pkt_len;
        } else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
-               unsigned int size =
-                       RTE_PKTMBUF_HEADROOM +
-                       dev->data->dev_conf.rxmode.max_rx_pkt_len;
+               unsigned int size = non_scatter_min_mbuf_size;
                unsigned int sges_n;
 
+               if (mlx5_lro_on(dev) && first_mb_free_size <
+                   MLX5_MAX_LRO_HEADER_FIX) {
+                       DRV_LOG(ERR, "Not enough space in the first segment(%u)"
+                               " to include the max header size(%u) for LRO",
+                               first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
+                       rte_errno = ENOTSUP;
+                       goto error;
+               }
                /*
                 * Determine the number of SGEs needed for a full packet
                 * and round it to the next power of two.
                 */
                sges_n = log2above((size / mb_len) + !!(size % mb_len));
-               tmpl->rxq.sges_n = sges_n;
-               /* Make sure rxq.sges_n did not overflow. */
-               size = mb_len * (1 << tmpl->rxq.sges_n);
-               size -= RTE_PKTMBUF_HEADROOM;
-               if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+               if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
                        DRV_LOG(ERR,
                                "port %u too many SGEs (%u) needed to handle"
-                               " requested maximum packet size %u",
-                               dev->data->port_id,
-                               1 << sges_n,
-                               dev->data->dev_conf.rxmode.max_rx_pkt_len);
-                       rte_errno = EOVERFLOW;
+                               " requested maximum packet size %u, the maximum"
+                               " supported are %u", dev->data->port_id,
+                               1 << sges_n, max_rx_pkt_len,
+                               1u << MLX5_MAX_LOG_RQ_SEGS);
+                       rte_errno = ENOTSUP;
                        goto error;
                }
-       } else {
-               DRV_LOG(WARNING,
-                       "port %u the requested maximum Rx packet size (%u) is"
-                       " larger than a single mbuf (%u) and scattered mode has"
-                       " not been requested",
-                       dev->data->port_id,
-                       dev->data->dev_conf.rxmode.max_rx_pkt_len,
-                       mb_len - RTE_PKTMBUF_HEADROOM);
+               tmpl->rxq.sges_n = sges_n;
+               max_lro_size = max_rx_pkt_len;
        }
        if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq))
                DRV_LOG(WARNING,
@@ -1671,6 +1739,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                rte_errno = EINVAL;
                goto error;
        }
+       mlx5_max_lro_msg_size_adjust(dev, max_lro_size);
        /* Toggle RX checksum offload if hardware supports it. */
        tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM);
        tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP);
@@ -1780,8 +1849,9 @@ mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
        if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj))
                rxq_ctrl->obj = NULL;
        if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
-               claim_zero(mlx5_release_dbr(dev, rxq_ctrl->dbr_umem_id,
-                                           rxq_ctrl->dbr_offset));
+               if (rxq_ctrl->dbr_umem_id_valid)
+                       claim_zero(mlx5_release_dbr(dev, rxq_ctrl->dbr_umem_id,
+                                                   rxq_ctrl->dbr_offset));
                mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
                LIST_REMOVE(rxq_ctrl, next);
                rte_free(rxq_ctrl);
@@ -2151,7 +2221,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
                if (lro) {
                        tir_attr.lro_timeout_period_usecs =
                                        priv->config.lro.timeout;
-                       tir_attr.lro_max_msg_sz = 0xff;
+                       tir_attr.lro_max_msg_sz = priv->max_lro_msg_size;
                        tir_attr.lro_enable_mask = lro;
                }
                tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr);