net/ice: add AVX2 offload Tx

author Wenzhuo Lu <wenzhuo.lu@intel.com>

Tue, 29 Jun 2021 02:29:20 +0000 (10:29 +0800)

committer Qi Zhang <qi.z.zhang@intel.com>

Tue, 6 Jul 2021 02:57:33 +0000 (04:57 +0200)
author Wenzhuo Lu <wenzhuo.lu@intel.com>
Tue, 29 Jun 2021 02:29:20 +0000 (10:29 +0800)
committer Qi Zhang <qi.z.zhang@intel.com>
Tue, 6 Jul 2021 02:57:33 +0000 (04:57 +0200)
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c

index 20352b07af7bc6c58d83c8877b819650c800b1f5..c6aa326e4f34b49b67d27036b5431d231a5b9679 100644 (file)
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -3288,7 +3288,7 @@ ice_set_tx_function(struct rte_eth_dev *dev)
  #ifdef RTE_ARCH_X86
         struct ice_tx_queue *txq;
         int i;
-       int tx_check_ret = 0;
+       int tx_check_ret = -1;
  
         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                 ad->tx_use_avx2 = false;
@@ -3307,13 +3307,14 @@ ice_set_tx_function(struct rte_eth_dev *dev)
                         PMD_DRV_LOG(NOTICE,
                                 "AVX512 is not supported in build env");
  #endif
-                       if (!ad->tx_use_avx512 && tx_check_ret == ICE_VECTOR_PATH &&
-                       (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
-                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+                       if (!ad->tx_use_avx512 &&
+                               (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                               rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                                 ad->tx_use_avx2 = true;
  
-                       if (!ad->tx_use_avx512 && tx_check_ret == ICE_VECTOR_OFFLOAD_PATH)
+                       if (!ad->tx_use_avx2 && !ad->tx_use_avx512 &&
+                               tx_check_ret == ICE_VECTOR_OFFLOAD_PATH)
                                 ad->tx_vec_allowed = false;
  
                         if (ad->tx_vec_allowed) {
@@ -3331,6 +3332,7 @@ ice_set_tx_function(struct rte_eth_dev *dev)
         }
  
         if (ad->tx_vec_allowed) {
+               dev->tx_pkt_prepare = NULL;
                 if (ad->tx_use_avx512) {
  #ifdef CC_AVX512_SUPPORT
                         if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
@@ -3339,6 +3341,7 @@ ice_set_tx_function(struct rte_eth_dev *dev)
                                             dev->data->port_id);
                                 dev->tx_pkt_burst =
                                         ice_xmit_pkts_vec_avx512_offload;
+                               dev->tx_pkt_prepare = ice_prep_pkts;
                         } else {
                                 PMD_DRV_LOG(NOTICE,
                                             "Using AVX512 Vector Tx (port %d).",
@@ -3347,14 +3350,22 @@ ice_set_tx_function(struct rte_eth_dev *dev)
                         }
  #endif
                 } else {
-                       PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-                                   ad->tx_use_avx2 ? "avx2 " : "",
-                                   dev->data->port_id);
-                       dev->tx_pkt_burst = ad->tx_use_avx2 ?
-                                           ice_xmit_pkts_vec_avx2 :
-                                           ice_xmit_pkts_vec;
+                       if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+                               PMD_DRV_LOG(NOTICE,
+                                           "Using AVX2 OFFLOAD Vector Tx (port %d).",
+                                           dev->data->port_id);
+                               dev->tx_pkt_burst =
+                                       ice_xmit_pkts_vec_avx2_offload;
+                               dev->tx_pkt_prepare = ice_prep_pkts;
+                       } else {
+                               PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
+                                           ad->tx_use_avx2 ? "avx2 " : "",
+                                           dev->data->port_id);
+                               dev->tx_pkt_burst = ad->tx_use_avx2 ?
+                                                   ice_xmit_pkts_vec_avx2 :
+                                                   ice_xmit_pkts_vec;
+                       }
                 }
-               dev->tx_pkt_prepare = NULL;
  
                 return;
         }
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h

index 86b6f3dcc0c9f9eb99b5bdbf1693101d0c14fefd..f0536f7d9cb46d48594bded722e2fc02cd87c8eb 100644 (file)
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -255,6 +255,8 @@ uint16_t ice_recv_scattered_pkts_vec_avx2(void *rx_queue,
                                           uint16_t nb_pkts);
  uint16_t ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
                                 uint16_t nb_pkts);
+uint16_t ice_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+                                       uint16_t nb_pkts);
  uint16_t ice_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
                                   uint16_t nb_pkts);
  uint16_t ice_recv_pkts_vec_avx512_offload(void *rx_queue,
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c

index 165bc1bb9d5ad24c9ea3014bfe04630d98c73242..b72946b1fa859e57869364884ee4a346bc1c7511 100644 (file)
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -769,30 +769,32 @@ ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
                                 rx_pkts + retval, nb_pkts);
  }
  
-static inline void
+static __rte_always_inline void
  ice_vtx1(volatile struct ice_tx_desc *txdp,
-        struct rte_mbuf *pkt, uint64_t flags)
+        struct rte_mbuf *pkt, uint64_t flags, bool offload)
  {
         uint64_t high_qw =
                 (ICE_TX_DESC_DTYPE_DATA |
                  ((uint64_t)flags  << ICE_TXD_QW1_CMD_S) |
                  ((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S));
+       if (offload)
+               ice_txd_enable_offload(pkt, &high_qw);
  
         __m128i descriptor = _mm_set_epi64x(high_qw,
                                 pkt->buf_iova + pkt->data_off);
         _mm_store_si128((__m128i *)txdp, descriptor);
  }
  
-static inline void
+static __rte_always_inline void
  ice_vtx(volatile struct ice_tx_desc *txdp,
-       struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
+       struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags, bool offload)
  {
         const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
                         ((uint64_t)flags  << ICE_TXD_QW1_CMD_S));
  
         /* if unaligned on 32-bit boundary, do one to align */
         if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
-               ice_vtx1(txdp, *pkt, flags);
+               ice_vtx1(txdp, *pkt, flags, offload);
                 nb_pkts--, txdp++, pkt++;
         }
  
@@ -802,18 +804,26 @@ ice_vtx(volatile struct ice_tx_desc *txdp,
                         hi_qw_tmpl |
                         ((uint64_t)pkt[3]->data_len <<
                          ICE_TXD_QW1_TX_BUF_SZ_S);
+               if (offload)
+                       ice_txd_enable_offload(pkt[3], &hi_qw3);
                 uint64_t hi_qw2 =
                         hi_qw_tmpl |
                         ((uint64_t)pkt[2]->data_len <<
                          ICE_TXD_QW1_TX_BUF_SZ_S);
+               if (offload)
+                       ice_txd_enable_offload(pkt[2], &hi_qw2);
                 uint64_t hi_qw1 =
                         hi_qw_tmpl |
                         ((uint64_t)pkt[1]->data_len <<
                          ICE_TXD_QW1_TX_BUF_SZ_S);
+               if (offload)
+                       ice_txd_enable_offload(pkt[1], &hi_qw1);
                 uint64_t hi_qw0 =
                         hi_qw_tmpl |
                         ((uint64_t)pkt[0]->data_len <<
                          ICE_TXD_QW1_TX_BUF_SZ_S);
+               if (offload)
+                       ice_txd_enable_offload(pkt[0], &hi_qw0);
  
                 __m256i desc2_3 =
                         _mm256_set_epi64x
@@ -833,14 +843,14 @@ ice_vtx(volatile struct ice_tx_desc *txdp,
  
         /* do any last ones */
         while (nb_pkts) {
-               ice_vtx1(txdp, *pkt, flags);
+               ice_vtx1(txdp, *pkt, flags, offload);
                 txdp++, pkt++, nb_pkts--;
         }
  }
  
-static inline uint16_t
+static __rte_always_inline uint16_t
  ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
-                             uint16_t nb_pkts)
+                             uint16_t nb_pkts, bool offload)
  {
         struct ice_tx_queue *txq = (struct ice_tx_queue *)tx_queue;
         volatile struct ice_tx_desc *txdp;
@@ -869,11 +879,11 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
         if (nb_commit >= n) {
                 ice_tx_backlog_entry(txep, tx_pkts, n);
  
-               ice_vtx(txdp, tx_pkts, n - 1, flags);
+               ice_vtx(txdp, tx_pkts, n - 1, flags, offload);
                 tx_pkts += (n - 1);
                 txdp += (n - 1);
  
-               ice_vtx1(txdp, *tx_pkts++, rs);
+               ice_vtx1(txdp, *tx_pkts++, rs, offload);
  
                 nb_commit = (uint16_t)(nb_commit - n);
  
@@ -887,7 +897,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
  
         ice_tx_backlog_entry(txep, tx_pkts, nb_commit);
  
-       ice_vtx(txdp, tx_pkts, nb_commit, flags);
+       ice_vtx(txdp, tx_pkts, nb_commit, flags, offload);
  
         tx_id = (uint16_t)(tx_id + nb_commit);
         if (tx_id > txq->tx_next_rs) {
@@ -905,9 +915,9 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
         return nb_pkts;
  }
  
-uint16_t
-ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
-                      uint16_t nb_pkts)
+static __rte_always_inline uint16_t
+ice_xmit_pkts_vec_avx2_common(void *tx_queue, struct rte_mbuf **tx_pkts,
+                             uint16_t nb_pkts, bool offload)
  {
         uint16_t nb_tx = 0;
         struct ice_tx_queue *txq = (struct ice_tx_queue *)tx_queue;
@@ -917,7 +927,7 @@ ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
  
                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
                 ret = ice_xmit_fixed_burst_vec_avx2(tx_queue, &tx_pkts[nb_tx],
-                                                   num);
+                                                   num, offload);
                 nb_tx += ret;
                 nb_pkts -= ret;
                 if (ret < num)
@@ -926,3 +936,17 @@ ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
  
         return nb_tx;
  }
+
+uint16_t
+ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
+                      uint16_t nb_pkts)
+{
+       return ice_xmit_pkts_vec_avx2_common(tx_queue, tx_pkts, nb_pkts, false);
+}
+
+uint16_t
+ice_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+                              uint16_t nb_pkts)
+{
+       return ice_xmit_pkts_vec_avx2_common(tx_queue, tx_pkts, nb_pkts, true);
+}
author	Wenzhuo Lu <wenzhuo.lu@intel.com>
	Tue, 29 Jun 2021 02:29:20 +0000 (10:29 +0800)
committer	Qi Zhang <qi.z.zhang@intel.com>
	Tue, 6 Jul 2021 02:57:33 +0000 (04:57 +0200)
drivers/net/ice/ice_rxtx.c		patch \| blob \| history
drivers/net/ice/ice_rxtx.h		patch \| blob \| history
drivers/net/ice/ice_rxtx_vec_avx2.c		patch \| blob \| history