net/mlx5: support extended statistics
[dpdk.git] / drivers / net / mlx5 / mlx5_rxtx.c
index 46fb252..56c7f78 100644 (file)
@@ -82,7 +82,8 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
        __attribute__((always_inline));
 
 static inline void
-mlx5_tx_dbrec(struct txq *txq) __attribute__((always_inline));
+mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+       __attribute__((always_inline));
 
 static inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
@@ -326,40 +327,20 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *
  * @param txq
  *   Pointer to TX queue structure.
+ * @param wqe
+ *   Pointer to the last WQE posted in the NIC.
  */
 static inline void
-mlx5_tx_dbrec(struct txq *txq)
+mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
 {
-       uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
-       uint32_t data[4] = {
-               htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
-               htonl(txq->qp_num_8s),
-               0,
-               0,
-       };
+       uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+       volatile uint64_t *src = ((volatile uint64_t *)wqe);
+
        rte_wmb();
        *txq->qp_db = htonl(txq->wqe_ci);
        /* Ensure ordering between DB record and BF copy. */
        rte_wmb();
-       memcpy(dst, (uint8_t *)data, 16);
-       txq->bf_offset ^= (1 << txq->bf_buf_size);
-}
-
-/**
- * Prefetch a CQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param cqe_ci
- *   CQE consumer index.
- */
-static inline void
-tx_prefetch_cqe(struct txq *txq, uint16_t ci)
-{
-       volatile struct mlx5_cqe *cqe;
-
-       cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
-       rte_prefetch0(cqe);
+       *dst = *src;
 }
 
 /**
@@ -393,8 +374,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_cqe(txq, txq->cq_ci + 1);
        rte_prefetch0(*pkts);
        /* Start processing. */
        txq_complete(txq);
@@ -408,7 +387,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uintptr_t addr;
                uint64_t naddr;
                uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
-               uint8_t ehdr[2];
+               uint16_t ehdr;
                uint8_t cs_flags = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                uint32_t total_length = 0;
@@ -435,8 +414,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        rte_prefetch0(*pkts);
                addr = rte_pktmbuf_mtod(buf, uintptr_t);
                length = DATA_LEN(buf);
-               ehdr[0] = ((uint8_t *)addr)[0];
-               ehdr[1] = ((uint8_t *)addr)[1];
+               ehdr = (((uint8_t *)addr)[1] << 8) |
+                      ((uint8_t *)addr)[0];
 #ifdef MLX5_PMD_SOFT_COUNTERS
                total_length = length;
 #endif
@@ -505,7 +484,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                assert(addr <= addr_end);
                        }
                        /*
-                        * 2 DWORDs consumed by the WQE header + 1 DSEG +
+                        * 2 DWORDs consumed by the WQE header + ETH segment +
                         * the size of the inline part of the packet.
                         */
                        ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
@@ -520,6 +499,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        } else if (!segs_n) {
                                goto next_pkt;
                        } else {
+                               /* dseg will be advance as part of next_seg */
+                               dseg = (volatile rte_v128u32_t *)
+                                       ((uintptr_t)wqe +
+                                        ((ds - 1) * MLX5_WQE_DWORD_SIZE));
                                goto next_seg;
                        }
                } else {
@@ -599,8 +582,7 @@ next_pkt:
                        0,
                        cs_flags,
                        0,
-                       (ehdr[1] << 24) | (ehdr[0] << 16) |
-                       htons(pkt_inline_sz),
+                       (ehdr << 16) | htons(pkt_inline_sz),
                };
                txq->wqe_ci += (ds + 3) / 4;
 #ifdef MLX5_PMD_SOFT_COUNTERS
@@ -630,7 +612,7 @@ next_pkt:
        txq->stats.opackets += i;
 #endif
        /* Ring QP doorbell. */
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -733,7 +715,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
        rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
        rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
        /* Start processing. */
@@ -838,7 +819,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        /* Ring QP doorbell. */
        if (mpw.state == MLX5_MPW_STATE_OPENED)
                mlx5_mpw_close(txq, &mpw);
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, mpw.wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -938,7 +919,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
        rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
        rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
        /* Start processing. */
@@ -1107,7 +1087,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                mlx5_mpw_inline_close(txq, &mpw);
        else if (mpw.state == MLX5_MPW_STATE_OPENED)
                mlx5_mpw_close(txq, &mpw);
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, mpw.wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -1133,24 +1113,24 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
                pkt_type =
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_OUTER_IPV4_PACKET,
-                                 RTE_PTYPE_L3_IPV4) |
+                                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_OUTER_IPV6_PACKET,
-                                 RTE_PTYPE_L3_IPV6) |
+                                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_IPV4_PACKET,
-                                 RTE_PTYPE_INNER_L3_IPV4) |
+                                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_IPV6_PACKET,
-                                 RTE_PTYPE_INNER_L3_IPV6);
+                                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN);
        else
                pkt_type =
                        TRANSPOSE(flags,
                                  MLX5_CQE_L3_HDR_TYPE_IPV6,
-                                 RTE_PTYPE_L3_IPV6) |
+                                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_L3_HDR_TYPE_IPV4,
-                                 RTE_PTYPE_L3_IPV4);
+                                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
        return pkt_type;
 }