ethdev: change queue release callback
[dpdk.git] / drivers / net / iavf / iavf_rxtx.c
index caf14a2..88661e5 100644 (file)
@@ -57,6 +57,18 @@ iavf_proto_xtr_type_to_rxdid(uint8_t flex_type)
                                rxdid_map[flex_type] : IAVF_RXDID_COMMS_OVS_1;
 }
 
+static int
+iavf_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -69,12 +81,8 @@ iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
        /* watch for changes in status bit */
        pmc->addr = &rxdp->wb.qword1.status_error_len;
 
-       /*
-        * we expect the DD bit to be set to 1 if this descriptor was already
-        * written to.
-        */
-       pmc->val = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
-       pmc->mask = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
+       /* comparison callback */
+       pmc->fn = iavf_monitor_callback;
 
        /* registers are 64-bit */
        pmc->size = sizeof(uint64_t);
@@ -160,7 +168,7 @@ check_rx_vec_allow(struct iavf_rx_queue *rxq)
 static inline bool
 check_tx_vec_allow(struct iavf_tx_queue *txq)
 {
-       if (!(txq->offloads & IAVF_NO_VECTOR_FLAGS) &&
+       if (!(txq->offloads & IAVF_TX_NO_VECTOR_FLAGS) &&
            txq->rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
            txq->rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
                PMD_INIT_LOG(DEBUG, "Vector tx can be enabled on this txq.");
@@ -217,6 +225,10 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 
        rxq->rx_tail = 0;
        rxq->nb_rx_hold = 0;
+
+       if (rxq->pkt_first_seg != NULL)
+               rte_pktmbuf_free(rxq->pkt_first_seg);
+
        rxq->pkt_first_seg = NULL;
        rxq->pkt_last_seg = NULL;
        rxq->rxrearm_nb = 0;
@@ -266,11 +278,15 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
        volatile union iavf_rx_desc *rxd;
        struct rte_mbuf *mbuf = NULL;
        uint64_t dma_addr;
-       uint16_t i;
+       uint16_t i, j;
 
        for (i = 0; i < rxq->nb_rx_desc; i++) {
                mbuf = rte_mbuf_raw_alloc(rxq->mp);
                if (unlikely(!mbuf)) {
+                       for (j = 0; j < i; j++) {
+                               rte_pktmbuf_free_seg(rxq->sw_ring[j]);
+                               rxq->sw_ring[j] = NULL;
+                       }
                        PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
                        return -ENOMEM;
                }
@@ -523,9 +539,12 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        uint8_t proto_xtr;
        uint16_t len;
        uint16_t rx_free_thresh;
+       uint64_t offloads;
 
        PMD_INIT_FUNC_TRACE();
 
+       offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
+
        if (nb_desc % IAVF_ALIGN_RING_DESC != 0 ||
            nb_desc > IAVF_MAX_RING_DESC ||
            nb_desc < IAVF_MIN_RING_DESC) {
@@ -543,7 +562,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
        /* Free memory if needed */
        if (dev->data->rx_queues[queue_idx]) {
-               iavf_dev_rx_queue_release(dev->data->rx_queues[queue_idx]);
+               iavf_dev_rx_queue_release(dev, queue_idx);
                dev->data->rx_queues[queue_idx] = NULL;
        }
 
@@ -596,6 +615,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
        rxq->rx_hdr_len = 0;
        rxq->vsi = vsi;
+       rxq->offloads = offloads;
 
        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = RTE_ETHER_CRC_LEN;
@@ -603,7 +623,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                rxq->crc_len = 0;
 
        len = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
-       rxq->rx_buf_len = RTE_ALIGN(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT));
+       rxq->rx_buf_len = RTE_ALIGN_FLOOR(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT));
 
        /* Allocate the software ring. */
        len = nb_desc + IAVF_RX_MAX_BURST;
@@ -696,11 +716,12 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
        tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
                tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
-       check_tx_thresh(nb_desc, tx_rs_thresh, tx_rs_thresh);
+       if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
+               return -EINVAL;
 
        /* Free memory if needed. */
        if (dev->data->tx_queues[queue_idx]) {
-               iavf_dev_tx_queue_release(dev->data->tx_queues[queue_idx]);
+               iavf_dev_tx_queue_release(dev, queue_idx);
                dev->data->tx_queues[queue_idx] = NULL;
        }
 
@@ -781,6 +802,22 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                ad->tx_vec_allowed = false;
        }
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+           vf->tm_conf.committed) {
+               int tc;
+               for (tc = 0; tc < vf->qos_cap->num_elem; tc++) {
+                       if (txq->queue_id >= vf->qtc_map[tc].start_queue_id &&
+                           txq->queue_id < (vf->qtc_map[tc].start_queue_id +
+                           vf->qtc_map[tc].queue_count))
+                               break;
+               }
+               if (tc >= vf->qos_cap->num_elem) {
+                       PMD_INIT_LOG(ERR, "Queue TC mapping is not correct");
+                       return -EINVAL;
+               }
+               txq->tc = tc;
+       }
+
        return 0;
 }
 
@@ -819,12 +856,14 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        else
                err = iavf_switch_queue_lv(adapter, rx_queue_id, true, true);
 
-       if (err)
+       if (err) {
+               release_rxq_mbufs(rxq);
                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
                            rx_queue_id);
-       else
+       } else {
                dev->data->rx_queue_state[rx_queue_id] =
                        RTE_ETH_QUEUE_STATE_STARTED;
+       }
 
        return err;
 }
@@ -923,9 +962,9 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 void
-iavf_dev_rx_queue_release(void *rxq)
+iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-       struct iavf_rx_queue *q = (struct iavf_rx_queue *)rxq;
+       struct iavf_rx_queue *q = dev->data->rx_queues[qid];
 
        if (!q)
                return;
@@ -937,9 +976,9 @@ iavf_dev_rx_queue_release(void *rxq)
 }
 
 void
-iavf_dev_tx_queue_release(void *txq)
+iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-       struct iavf_tx_queue *q = (struct iavf_tx_queue *)txq;
+       struct iavf_tx_queue *q = dev->data->tx_queues[qid];
 
        if (!q)
                return;
@@ -1157,7 +1196,7 @@ iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
                           rxq->port_id, rxq->queue_id, rx_id, nb_hold);
                rx_id = (uint16_t)((rx_id == 0) ?
                        (rxq->nb_rx_desc - 1) : (rx_id - 1));
-               IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+               IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
                nb_hold = 0;
        }
        rxq->nb_rx_hold = nb_hold;
@@ -1214,6 +1253,7 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (unlikely(rx_id == rxq->nb_rx_desc))
                        rx_id = 0;
@@ -1319,6 +1359,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (unlikely(rx_id == rxq->nb_rx_desc))
                        rx_id = 0;
@@ -1410,6 +1451,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (rx_id == rxq->nb_rx_desc)
                        rx_id = 0;
@@ -1563,6 +1605,7 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (rx_id == rxq->nb_rx_desc)
                        rx_id = 0;
@@ -1902,7 +1945,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 
        /* Update rx tail register */
        rte_wmb();
-       IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
+       IAVF_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
 
        rxq->rx_free_trigger =
                (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh);
@@ -2004,9 +2047,9 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq)
        if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
                        rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
                        rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE)) {
-               PMD_TX_FREE_LOG(DEBUG, "TX descriptor %4u is not done "
-                               "(port=%d queue=%d)", desc_to_clean_to,
-                               txq->port_id, txq->queue_id);
+               PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
+                          "(port=%d queue=%d)", desc_to_clean_to,
+                          txq->port_id, txq->queue_id);
                return -1;
        }
 
@@ -2241,6 +2284,11 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                (volatile struct iavf_tx_context_desc *)
                                                        &txr[tx_id];
 
+                       /* clear QW0 or the previous writeback value
+                        * may impact next write
+                        */
+                       *(volatile uint64_t *)ctx_txd = 0;
+
                        txn = &sw_ring[txe->next_id];
                        RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
                        if (txe->mbuf) {
@@ -2323,12 +2371,33 @@ end_of_tx:
        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
                   txq->port_id, txq->queue_id, tx_id, nb_tx);
 
-       IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
+       IAVF_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
 }
 
+/* Check if the packet with vlan user priority is transmitted in the
+ * correct queue.
+ */
+static int
+iavf_check_vlan_up2tc(struct iavf_tx_queue *txq, struct rte_mbuf *m)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       uint16_t up;
+
+       up = m->vlan_tci >> IAVF_VLAN_TAG_PCP_OFFSET;
+
+       if (!(vf->qos_cap->cap[txq->tc].tc_prio & BIT(up))) {
+               PMD_TX_LOG(ERR, "packet with vlan pcp %u cannot transmit in queue %u\n",
+                       up, txq->queue_id);
+               return -1;
+       } else {
+               return 0;
+       }
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -2337,6 +2406,9 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
        int i, ret;
        uint64_t ol_flags;
        struct rte_mbuf *m;
+       struct iavf_tx_queue *txq = tx_queue;
+       struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
        for (i = 0; i < nb_pkts; i++) {
                m = tx_pkts[i];
@@ -2360,7 +2432,7 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
                        return i;
                }
 
-#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+#ifdef RTE_ETHDEV_DEBUG_TX
                ret = rte_validate_tx_offload(m);
                if (ret != 0) {
                        rte_errno = -ret;
@@ -2372,6 +2444,15 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
                        rte_errno = -ret;
                        return i;
                }
+
+               if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+                   ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN)) {
+                       ret = iavf_check_vlan_up2tc(txq, m);
+                       if (ret != 0) {
+                               rte_errno = -ret;
+                               return i;
+                       }
+               }
        }
 
        return i;
@@ -2388,22 +2469,19 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 #ifdef RTE_ARCH_X86
        struct iavf_rx_queue *rxq;
        int i;
+       int check_ret;
        bool use_avx2 = false;
-#ifdef CC_AVX512_SUPPORT
        bool use_avx512 = false;
-#endif
-
-       if (!iavf_rx_vec_dev_check(dev) &&
-                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
-               for (i = 0; i < dev->data->nb_rx_queues; i++) {
-                       rxq = dev->data->rx_queues[i];
-                       (void)iavf_rxq_vec_setup(rxq);
-               }
+       bool use_flex = false;
 
+       check_ret = iavf_rx_vec_dev_check(dev);
+       if (check_ret >= 0 &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
                if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
                     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
-                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                        use_avx2 = true;
+
 #ifdef CC_AVX512_SUPPORT
                if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
@@ -2411,61 +2489,110 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
                        use_avx512 = true;
 #endif
 
+               if (vf->vf_res->vf_cap_flags &
+                       VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                       use_flex = true;
+
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       rxq = dev->data->rx_queues[i];
+                       (void)iavf_rxq_vec_setup(rxq);
+               }
+
                if (dev->data->scattered_rx) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "Using %sVector Scattered Rx (port %d).",
-                                   use_avx2 ? "avx2 " : "",
-                                   dev->data->port_id);
-                       if (vf->vf_res->vf_cap_flags &
-                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+                       if (!use_avx512) {
+                               PMD_DRV_LOG(DEBUG,
+                                           "Using %sVector Scattered Rx (port %d).",
+                                           use_avx2 ? "avx2 " : "",
+                                           dev->data->port_id);
+                       } else {
+                               if (check_ret == IAVF_VECTOR_PATH)
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 Vector Scattered Rx (port %d).",
+                                                   dev->data->port_id);
+                               else
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 OFFLOAD Vector Scattered Rx (port %d).",
+                                                   dev->data->port_id);
+                       }
+                       if (use_flex) {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
                                        iavf_recv_scattered_pkts_vec_flex_rxd;
 #ifdef CC_AVX512_SUPPORT
-                               if (use_avx512)
-                                       dev->rx_pkt_burst =
-                                               iavf_recv_scattered_pkts_vec_avx512_flex_rxd;
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_flex_rxd;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_flex_rxd_offload;
+                               }
 #endif
                        } else {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_scattered_pkts_vec_avx2 :
                                        iavf_recv_scattered_pkts_vec;
 #ifdef CC_AVX512_SUPPORT
-                               if (use_avx512)
-                                       dev->rx_pkt_burst =
-                                               iavf_recv_scattered_pkts_vec_avx512;
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_offload;
+                               }
 #endif
                        }
                } else {
-                       PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
-                                   use_avx2 ? "avx2 " : "",
-                                   dev->data->port_id);
-                       if (vf->vf_res->vf_cap_flags &
-                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+                       if (!use_avx512) {
+                               PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
+                                           use_avx2 ? "avx2 " : "",
+                                           dev->data->port_id);
+                       } else {
+                               if (check_ret == IAVF_VECTOR_PATH)
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 Vector Rx (port %d).",
+                                                   dev->data->port_id);
+                               else
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 OFFLOAD Vector Rx (port %d).",
+                                                   dev->data->port_id);
+                       }
+                       if (use_flex) {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_pkts_vec_avx2_flex_rxd :
                                        iavf_recv_pkts_vec_flex_rxd;
 #ifdef CC_AVX512_SUPPORT
-                               if (use_avx512)
-                                       dev->rx_pkt_burst =
-                                               iavf_recv_pkts_vec_avx512_flex_rxd;
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_flex_rxd;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_flex_rxd_offload;
+                               }
 #endif
                        } else {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_pkts_vec_avx2 :
                                        iavf_recv_pkts_vec;
 #ifdef CC_AVX512_SUPPORT
-                               if (use_avx512)
-                                       dev->rx_pkt_burst =
-                                               iavf_recv_pkts_vec_avx512;
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_offload;
+                               }
 #endif
                        }
                }
 
                return;
        }
-#endif
 
+#endif
        if (dev->data->scattered_rx) {
                PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
                            dev->data->port_id);
@@ -2494,17 +2621,23 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 #ifdef RTE_ARCH_X86
        struct iavf_tx_queue *txq;
        int i;
+       int check_ret;
+       bool use_sse = false;
        bool use_avx2 = false;
-#ifdef CC_AVX512_SUPPORT
        bool use_avx512 = false;
-#endif
 
-       if (!iavf_tx_vec_dev_check(dev) &&
-                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
-               if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
-                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
-                       use_avx2 = true;
+       check_ret = iavf_tx_vec_dev_check(dev);
+
+       if (check_ret >= 0 &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               /* SSE and AVX2 not support offload path yet. */
+               if (check_ret == IAVF_VECTOR_PATH) {
+                       use_sse = true;
+                       if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                            rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+                               use_avx2 = true;
+               }
 #ifdef CC_AVX512_SUPPORT
                if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
@@ -2512,17 +2645,32 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
                        use_avx512 = true;
 #endif
 
-               PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-                           use_avx2 ? "avx2 " : "",
-                           dev->data->port_id);
-               dev->tx_pkt_burst = use_avx2 ?
-                                   iavf_xmit_pkts_vec_avx2 :
-                                   iavf_xmit_pkts_vec;
+               if (!use_sse && !use_avx2 && !use_avx512)
+                       goto normal;
+
+               if (!use_avx512) {
+                       PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
+                                   use_avx2 ? "avx2 " : "",
+                                   dev->data->port_id);
+                       dev->tx_pkt_burst = use_avx2 ?
+                                           iavf_xmit_pkts_vec_avx2 :
+                                           iavf_xmit_pkts_vec;
+               }
+               dev->tx_pkt_prepare = NULL;
 #ifdef CC_AVX512_SUPPORT
-               if (use_avx512)
-                       dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
+               if (use_avx512) {
+                       if (check_ret == IAVF_VECTOR_PATH) {
+                               dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
+                               PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
+                                           dev->data->port_id);
+                       } else {
+                               dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
+                               dev->tx_pkt_prepare = iavf_prep_pkts;
+                               PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
+                                           dev->data->port_id);
+                       }
+               }
 #endif
-               dev->tx_pkt_prepare = NULL;
 
                for (i = 0; i < dev->data->nb_tx_queues; i++) {
                        txq = dev->data->tx_queues[i];
@@ -2540,8 +2688,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 
                return;
        }
-#endif
 
+normal:
+#endif
        PMD_DRV_LOG(DEBUG, "Using Basic Tx callback (port=%d).",
                    dev->data->port_id);
        dev->tx_pkt_burst = iavf_xmit_pkts;