net/mlx5: use work queue buffer as a raw buffer

author Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

Thu, 24 Nov 2016 16:03:31 +0000 (17:03 +0100)

committer Ferruh Yigit <ferruh.yigit@intel.com>

Tue, 17 Jan 2017 18:40:53 +0000 (19:40 +0100)
author Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
Thu, 24 Nov 2016 16:03:31 +0000 (17:03 +0100)
committer Ferruh Yigit <ferruh.yigit@intel.com>
Tue, 17 Jan 2017 18:40:53 +0000 (19:40 +0100)
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h

index d5155fdf07ed150919dabb3727c5a58dd2d9768b..3cc5af9aadb03399f98f5eb600f47bb3d2fdef5f 100644 (file)
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -125,12 +125,19 @@ struct mlx5_wqe_eth_seg_small {
         uint32_t rsvd2;
         uint16_t inline_hdr_sz;
         uint8_t inline_hdr[2];
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
  
  struct mlx5_wqe_inl_small {
         uint32_t byte_cnt;
         uint8_t raw;
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
+
+struct mlx5_wqe_ctrl {
+       uint32_t ctrl0;
+       uint32_t ctrl1;
+       uint32_t ctrl2;
+       uint32_t ctrl3;
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
  
  /* Small common part of the WQE. */
  struct mlx5_wqe {
@@ -142,7 +149,7 @@ struct mlx5_wqe {
  struct mlx5_wqe64 {
         struct mlx5_wqe hdr;
         uint8_t raw[32];
-} __rte_aligned(64);
+} __rte_aligned(MLX5_WQE_SIZE);
  
  /* MPW session status. */
  enum mlx5_mpw_state {
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c

index 0a76dd236481252b816215b1e9d8d043d2bf10d3..e6c0595f4cc6bd7658c2b605e64c67636989d933 100644 (file)
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -152,6 +152,24 @@ check_cqe(volatile struct mlx5_cqe *cqe,
         return 0;
  }
  
+/**
+ * Return the address of the WQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param  wqe_ci
+ *   WQE consumer index.
+ *
+ * @return
+ *   WQE address.
+ */
+static inline uintptr_t *
+tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+{
+       ci &= ((1 << txq->wqe_n) - 1);
+       return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
+}
+
  static inline void
  txq_complete(struct txq *txq) __attribute__((always_inline));
  
@@ -173,7 +191,7 @@ txq_complete(struct txq *txq)
         uint16_t elts_tail;
         uint16_t cq_ci = txq->cq_ci;
         volatile struct mlx5_cqe *cqe = NULL;
-       volatile struct mlx5_wqe *wqe;
+       volatile struct mlx5_wqe_ctrl *ctrl;
  
         do {
                 volatile struct mlx5_cqe *tmp;
@@ -199,9 +217,9 @@ txq_complete(struct txq *txq)
         } while (1);
         if (unlikely(cqe == NULL))
                 return;
-       wqe = &(*txq->wqes)[ntohs(cqe->wqe_counter) &
-                           ((1 << txq->wqe_n) - 1)].hdr;
-       elts_tail = wqe->ctrl[3];
+       ctrl = (volatile struct mlx5_wqe_ctrl *)
+               tx_mlx5_wqe(txq, ntohs(cqe->wqe_counter));
+       elts_tail = ctrl->ctrl3;
         assert(elts_tail < (1 << txq->wqe_n));
         /* Free buffers. */
         while (elts_free != elts_tail) {
@@ -328,23 +346,6 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci)
         rte_prefetch0(cqe);
  }
  
-/**
- * Prefetch a WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param  wqe_ci
- *   WQE consumer index.
- */
-static inline void
-tx_prefetch_wqe(struct txq *txq, uint16_t ci)
-{
-       volatile struct mlx5_wqe64 *wqe;
-
-       wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
-       rte_prefetch0(wqe);
-}
-
  /**
   * DPDK callback for TX.
   *
@@ -409,9 +410,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 --segs_n;
                 if (!segs_n)
                         --pkts_n;
-               wqe = &(*txq->wqes)[txq->wqe_ci &
-                                   ((1 << txq->wqe_n) - 1)].hdr;
-               tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+               wqe = (volatile struct mlx5_wqe *)
+                       tx_mlx5_wqe(txq, txq->wqe_ci);
+               rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
                 if (pkts_n > 1)
                         rte_prefetch0(*pkts);
                 addr = rte_pktmbuf_mtod(buf, uintptr_t);
@@ -462,8 +463,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 }
                 /* Inline if enough room. */
                 if (txq->max_inline != 0) {
-                       uintptr_t end =
-                               (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
+                       uintptr_t end = (uintptr_t)
+                               (((uintptr_t)txq->wqes) +
+                                (1 << txq->wqe_n) * MLX5_WQE_SIZE);
                         uint16_t max_inline =
                                 txq->max_inline * RTE_CACHE_LINE_SIZE;
                         uint16_t room;
@@ -494,12 +496,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                          */
                         ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
                         if (length > 0) {
-                               dseg = (struct mlx5_wqe_data_seg *)
+                               dseg = (volatile struct mlx5_wqe_data_seg *)
                                         ((uintptr_t)wqe +
                                          (ds * MLX5_WQE_DWORD_SIZE));
                                 if ((uintptr_t)dseg >= end)
-                                       dseg = (struct mlx5_wqe_data_seg *)
-                                               ((uintptr_t)&(*txq->wqes)[0]);
+                                       dseg = (volatile struct
+                                               mlx5_wqe_data_seg *)
+                                              txq->wqes;
                                 goto use_dseg;
                         } else if (!segs_n) {
                                 goto next_pkt;
@@ -512,12 +515,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                          * Ethernet Header as been stored.
                          */
                         wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
-                       dseg = (struct mlx5_wqe_data_seg *)
+                       dseg = (volatile struct mlx5_wqe_data_seg *)
                                 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
                         ds = 3;
  use_dseg:
                         /* Add the remaining packet as a simple ds. */
-                       *dseg = (struct mlx5_wqe_data_seg) {
+                       *dseg = (volatile struct mlx5_wqe_data_seg) {
                                 .addr = htonll(addr),
                                 .byte_count = htonl(length),
                                 .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
@@ -540,9 +543,9 @@ next_seg:
                         unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
                                 ((1 << txq->wqe_n) - 1);
  
-                       dseg = (struct mlx5_wqe_data_seg *)
-                               ((uintptr_t)&(*txq->wqes)[n]);
-                       tx_prefetch_wqe(txq, n + 1);
+                       dseg = (volatile struct mlx5_wqe_data_seg *)
+                              tx_mlx5_wqe(txq, n);
+                       rte_prefetch0(tx_mlx5_wqe(txq, n + 1));
                 } else {
                         ++dseg;
                 }
@@ -554,7 +557,7 @@ next_seg:
                 total_length += length;
  #endif
                 /* Store segment information. */
-               *dseg = (struct mlx5_wqe_data_seg) {
+               *dseg = (volatile struct mlx5_wqe_data_seg) {
                         .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
                         .byte_count = htonl(length),
                         .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
@@ -627,13 +630,13 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
         uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
         volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
                 (volatile struct mlx5_wqe_data_seg (*)[])
-               (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
+               tx_mlx5_wqe(txq, idx + 1);
  
         mpw->state = MLX5_MPW_STATE_OPENED;
         mpw->pkts_n = 0;
         mpw->len = length;
         mpw->total_len = 0;
-       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
         mpw->wqe->eseg.mss = htons(length);
         mpw->wqe->eseg.inline_hdr_sz = 0;
         mpw->wqe->eseg.rsvd0 = 0;
@@ -675,8 +678,8 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
                 ++txq->wqe_ci;
         else
                 txq->wqe_ci += 2;
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
  }
  
  /**
@@ -710,8 +713,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 return 0;
         /* Prefetch first packet cacheline. */
         tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
         /* Start processing. */
         txq_complete(txq);
         max = (elts_n - (elts_head - txq->elts_tail));
@@ -839,7 +842,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
         mpw->pkts_n = 0;
         mpw->len = length;
         mpw->total_len = 0;
-       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
         mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
                                   (txq->wqe_ci << 8) |
                                   MLX5_OPCODE_TSO);
@@ -915,8 +918,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                 return 0;
         /* Prefetch first packet cacheline. */
         tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
         /* Start processing. */
         txq_complete(txq);
         max = (elts_n - (elts_head - txq->elts_tail));
@@ -1017,14 +1020,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                         addr = rte_pktmbuf_mtod(buf, uintptr_t);
                         (*txq->elts)[elts_head] = buf;
                         /* Maximum number of bytes before wrapping. */
-                       max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
+                       max = ((((uintptr_t)(txq->wqes)) +
+                               (1 << txq->wqe_n) *
+                               MLX5_WQE_SIZE) -
                                (uintptr_t)mpw.data.raw);
                         if (length > max) {
                                 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
                                            (void *)addr,
                                            max);
-                               mpw.data.raw =
-                                       (volatile void *)&(*txq->wqes)[0];
+                               mpw.data.raw = (volatile void *)txq->wqes;
                                 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
                                            (void *)(addr + max),
                                            length - max);
@@ -1036,9 +1040,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                 mpw.data.raw += length;
                         }
                         if ((uintptr_t)mpw.data.raw ==
-                           (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
-                               mpw.data.raw =
-                                       (volatile void *)&(*txq->wqes)[0];
+                           (uintptr_t)tx_mlx5_wqe(txq, 1 << txq->wqe_n))
+                               mpw.data.raw = (volatile void *)txq->wqes;
                         ++mpw.pkts_n;
                         ++j;
                         if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h

index eec409ded52468fd7f7a3c7ecd64092772510f33..23a2548531dfa5c1dfbe318b0307c321f4e89a63 100644 (file)
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -257,7 +257,7 @@ struct txq {
         uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
         uint32_t qp_num_8s; /* QP number shifted by 8. */
         volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */
+       volatile void *wqes; /* Work queue (use volatile to write into). */
         volatile uint32_t *qp_db; /* Work queue doorbell. */
         volatile uint32_t *cq_db; /* Completion queue doorbell. */
         volatile void *bf_reg; /* Blueflame register. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c

index 053665d559bea3fb4dc47fed498cb717ec9e1c79..f4c6682a2b05090a88a2c4dfe7e6f2a134958345 100644 (file)
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -82,7 +82,9 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
         for (i = 0; (i != elts_n); ++i)
                 (*txq_ctrl->txq.elts)[i] = NULL;
         for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-               volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
+               volatile struct mlx5_wqe64 *wqe =
+                       (volatile struct mlx5_wqe64 *)
+                       txq_ctrl->txq.wqes + i;
  
                 memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
         }
@@ -214,9 +216,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
         }
         tmpl->txq.cqe_n = log2above(ibcq->cqe);
         tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
-       tmpl->txq.wqes =
-               (volatile struct mlx5_wqe64 (*)[])
-               (uintptr_t)qp->gen_data.sqstart;
+       tmpl->txq.wqes = qp->gen_data.sqstart;
         tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
         tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
         tmpl->txq.bf_reg = qp->gen_data.bf->reg;
author	Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
	Thu, 24 Nov 2016 16:03:31 +0000 (17:03 +0100)
committer	Ferruh Yigit <ferruh.yigit@intel.com>
	Tue, 17 Jan 2017 18:40:53 +0000 (19:40 +0100)
drivers/net/mlx5/mlx5_prm.h		patch \| blob \| history
drivers/net/mlx5/mlx5_rxtx.c		patch \| blob \| history
drivers/net/mlx5/mlx5_rxtx.h		patch \| blob \| history
drivers/net/mlx5/mlx5_txq.c		patch \| blob \| history