net/mlx5: add enhanced multi-packet send for ConnectX-5
[dpdk.git] / drivers / net / mlx5 / mlx5_txq.c
index 94ed3fc..bbfce75 100644 (file)
@@ -81,8 +81,10 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 
        for (i = 0; (i != elts_n); ++i)
                (*txq_ctrl->txq.elts)[i] = NULL;
-       for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
-               volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
+       for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
+               volatile struct mlx5_wqe64 *wqe =
+                       (volatile struct mlx5_wqe64 *)
+                       txq_ctrl->txq.wqes + i;
 
                memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
        }
@@ -214,14 +216,10 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
        }
        tmpl->txq.cqe_n = log2above(ibcq->cqe);
        tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
-       tmpl->txq.wqes =
-               (volatile struct mlx5_wqe64 (*)[])
-               (uintptr_t)qp->gen_data.sqstart;
-       tmpl->txq.wqe_n = qp->sq.wqe_cnt;
+       tmpl->txq.wqes = qp->gen_data.sqstart;
+       tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
        tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
        tmpl->txq.bf_reg = qp->gen_data.bf->reg;
-       tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-       tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size);
        tmpl->txq.cq_db = cq->dbrec;
        tmpl->txq.cqes =
                (volatile struct mlx5_cqe (*)[])
@@ -268,6 +266,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                struct ibv_exp_cq_attr cq_attr;
        } attr;
        enum ibv_exp_query_intf_status status;
+       unsigned int cqe_n;
        int ret = 0;
 
        if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
@@ -278,6 +277,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
        (void)conf; /* Thresholds configuration (ignored). */
        assert(desc > MLX5_TX_COMP_THRESH);
        tmpl.txq.elts_n = log2above(desc);
+       if (priv->mps == MLX5_MPW_ENHANCED)
+               tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
        /* MRs will be registered in mp2mr[] later. */
        attr.rd = (struct ibv_exp_res_domain_init_attr){
                .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
@@ -296,9 +297,12 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
                .res_domain = tmpl.rd,
        };
+       cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
+               ((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+       if (priv->mps == MLX5_MPW_ENHANCED)
+               cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
        tmpl.cq = ibv_exp_create_cq(priv->ctx,
-                                   (((desc / MLX5_TX_COMP_THRESH) - 1) ?
-                                    ((desc / MLX5_TX_COMP_THRESH) - 1) : 1),
+                                   cqe_n,
                                    NULL, NULL, 0, &attr.cq);
        if (tmpl.cq == NULL) {
                ret = ENOMEM;
@@ -342,9 +346,39 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                tmpl.txq.max_inline =
                        ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
                         RTE_CACHE_LINE_SIZE);
-               attr.init.cap.max_inline_data =
-                       tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
+               tmpl.txq.inline_en = 1;
+               /* TSO and MPS can't be enabled concurrently. */
+               assert(!priv->tso || !priv->mps);
+               if (priv->mps == MLX5_MPW_ENHANCED) {
+                       tmpl.txq.inline_max_packet_sz =
+                               priv->inline_max_packet_sz;
+                       /* To minimize the size of data set, avoid requesting
+                        * too large WQ.
+                        */
+                       attr.init.cap.max_inline_data =
+                               ((RTE_MIN(priv->txq_inline,
+                                         priv->inline_max_packet_sz) +
+                                 (RTE_CACHE_LINE_SIZE - 1)) /
+                                RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+               } else {
+                       attr.init.cap.max_inline_data =
+                               tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
+               }
+       }
+       if (priv->tso) {
+               uint16_t max_tso_inline = ((MLX5_MAX_TSO_HEADER +
+                                          (RTE_CACHE_LINE_SIZE - 1)) /
+                                           RTE_CACHE_LINE_SIZE);
+
+               attr.init.max_tso_header =
+                       max_tso_inline * RTE_CACHE_LINE_SIZE;
+               attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
+               tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
+                                             max_tso_inline);
+               tmpl.txq.tso_en = 1;
        }
+       if (priv->tunnel_en)
+               tmpl.txq.tunnel_en = 1;
        tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
        if (tmpl.qp == NULL) {
                ret = (errno ? errno : EINVAL);
@@ -412,7 +446,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                .obj = tmpl.qp,
                /* Enable multi-packet send if supported. */
                .family_flags =
-                       ((priv->mps && !priv->sriov) ?
+                       (priv->mps ?
                         IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR :
                         0),
        };