net/bnxt: refactor async event handling
[dpdk.git] / drivers / net / iavf / iavf_rxtx.c
index 9eccb7c..d61b32f 100644 (file)
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ether.h>
-#include <rte_ethdev_driver.h>
+#include <ethdev_driver.h>
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 #include <rte_udp.h>
 #include <rte_ip.h>
 #include <rte_net.h>
+#include <rte_vect.h>
 
 #include "iavf.h"
 #include "iavf_rxtx.h"
+#include "rte_pmd_iavf.h"
+
+/* Offset of mbuf dynamic field for protocol extraction's metadata */
+int rte_pmd_ifd_dynfield_proto_xtr_metadata_offs = -1;
+
+/* Mask of mbuf dynamic flags for protocol extraction's type */
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_vlan_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_tcp_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask;
+
+uint8_t
+iavf_proto_xtr_type_to_rxdid(uint8_t flex_type)
+{
+       static uint8_t rxdid_map[] = {
+               [IAVF_PROTO_XTR_NONE]      = IAVF_RXDID_COMMS_OVS_1,
+               [IAVF_PROTO_XTR_VLAN]      = IAVF_RXDID_COMMS_AUX_VLAN,
+               [IAVF_PROTO_XTR_IPV4]      = IAVF_RXDID_COMMS_AUX_IPV4,
+               [IAVF_PROTO_XTR_IPV6]      = IAVF_RXDID_COMMS_AUX_IPV6,
+               [IAVF_PROTO_XTR_IPV6_FLOW] = IAVF_RXDID_COMMS_AUX_IPV6_FLOW,
+               [IAVF_PROTO_XTR_TCP]       = IAVF_RXDID_COMMS_AUX_TCP,
+               [IAVF_PROTO_XTR_IP_OFFSET] = IAVF_RXDID_COMMS_AUX_IP_OFFSET,
+       };
+
+       return flex_type < RTE_DIM(rxdid_map) ?
+                               rxdid_map[flex_type] : IAVF_RXDID_COMMS_OVS_1;
+}
+
+static int
+iavf_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
+int
+iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
+{
+       struct iavf_rx_queue *rxq = rx_queue;
+       volatile union iavf_rx_desc *rxdp;
+       uint16_t desc;
+
+       desc = rxq->rx_tail;
+       rxdp = &rxq->rx_ring[desc];
+       /* watch for changes in status bit */
+       pmc->addr = &rxdp->wb.qword1.status_error_len;
+
+       /* comparison callback */
+       pmc->fn = iavf_monitor_callback;
+
+       /* registers are 64-bit */
+       pmc->size = sizeof(uint64_t);
+
+       return 0;
+}
 
 static inline int
 check_rx_thresh(uint16_t nb_desc, uint16_t thresh)
@@ -105,7 +168,7 @@ check_rx_vec_allow(struct iavf_rx_queue *rxq)
 static inline bool
 check_tx_vec_allow(struct iavf_tx_queue *txq)
 {
-       if (!(txq->offloads & IAVF_NO_VECTOR_FLAGS) &&
+       if (!(txq->offloads & IAVF_TX_NO_VECTOR_FLAGS) &&
            txq->rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
            txq->rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
                PMD_INIT_LOG(DEBUG, "Vector tx can be enabled on this txq.");
@@ -164,6 +227,8 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
        rxq->nb_rx_hold = 0;
        rxq->pkt_first_seg = NULL;
        rxq->pkt_last_seg = NULL;
+       rxq->rxrearm_nb = 0;
+       rxq->rxrearm_start = 0;
 }
 
 static inline void
@@ -294,6 +359,160 @@ static const struct iavf_txq_ops def_txq_ops = {
        .release_mbufs = release_txq_mbufs,
 };
 
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+                                   struct rte_mbuf *mb,
+                                   volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       uint16_t stat_err;
+#endif
+
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+#endif
+}
+
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+                                      struct rte_mbuf *mb,
+                                      volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               stat_err = rte_le_to_cpu_16(desc->status_error1);
+
+               if (stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_S))
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+
+               if (stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_S))
+                       metadata |=
+                               rte_le_to_cpu_16(desc->flex_ts.flex.aux1) << 16;
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_PMD_IFD_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+                                      struct rte_mbuf *mb,
+                                      volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               if (desc->flex_ts.flex.aux0 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+               else if (desc->flex_ts.flex.aux1 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux1);
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_PMD_IFD_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+static void
+iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+{
+       switch (rxdid) {
+       case IAVF_RXDID_COMMS_AUX_VLAN:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_vlan_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV4:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV6:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV6_FLOW:
+               rxq->xtr_ol_flag =
+                       rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_TCP:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_tcp_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IP_OFFSET:
+               rxq->xtr_ol_flag =
+                       rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v2;
+               break;
+       case IAVF_RXDID_COMMS_OVS_1:
+               rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+       default:
+               /* update this according to the RXDID for FLEX_DESC_NONE */
+               rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+       }
+
+       if (!rte_pmd_ifd_dynf_proto_xtr_metadata_avail())
+               rxq->xtr_ol_flag = 0;
+}
+
 int
 iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                       uint16_t nb_desc, unsigned int socket_id,
@@ -309,11 +528,15 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        struct iavf_rx_queue *rxq;
        const struct rte_memzone *mz;
        uint32_t ring_size;
+       uint8_t proto_xtr;
        uint16_t len;
        uint16_t rx_free_thresh;
+       uint64_t offloads;
 
        PMD_INIT_FUNC_TRACE();
 
+       offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
+
        if (nb_desc % IAVF_ALIGN_RING_DESC != 0 ||
            nb_desc > IAVF_MAX_RING_DESC ||
            nb_desc < IAVF_MIN_RING_DESC) {
@@ -346,15 +569,50 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                return -ENOMEM;
        }
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+               proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
+                               IAVF_PROTO_XTR_NONE;
+               rxq->rxdid = iavf_proto_xtr_type_to_rxdid(proto_xtr);
+               rxq->proto_xtr = proto_xtr;
+       } else {
+               rxq->rxdid = IAVF_RXDID_LEGACY_1;
+               rxq->proto_xtr = IAVF_PROTO_XTR_NONE;
+       }
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+               struct virtchnl_vlan_supported_caps *stripping_support =
+                               &vf->vlan_v2_caps.offloads.stripping_support;
+               uint32_t stripping_cap;
+
+               if (stripping_support->outer)
+                       stripping_cap = stripping_support->outer;
+               else
+                       stripping_cap = stripping_support->inner;
+
+               if (stripping_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+                       rxq->rx_flags = IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+               else if (stripping_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
+                       rxq->rx_flags = IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
+       } else {
+               rxq->rx_flags = IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+       }
+
+       iavf_select_rxd_to_pkt_fields_handler(rxq, rxq->rxdid);
+
        rxq->mp = mp;
        rxq->nb_rx_desc = nb_desc;
        rxq->rx_free_thresh = rx_free_thresh;
        rxq->queue_id = queue_idx;
        rxq->port_id = dev->data->port_id;
-       rxq->crc_len = 0; /* crc stripping by default */
        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
        rxq->rx_hdr_len = 0;
        rxq->vsi = vsi;
+       rxq->offloads = offloads;
+
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+               rxq->crc_len = RTE_ETHER_CRC_LEN;
+       else
+               rxq->crc_len = 0;
 
        len = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
        rxq->rx_buf_len = RTE_ALIGN(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT));
@@ -426,6 +684,8 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                       const struct rte_eth_txconf *tx_conf)
 {
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct iavf_info *vf =
+               IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_tx_queue *txq;
        const struct rte_memzone *mz;
        uint32_t ring_size;
@@ -467,6 +727,24 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return -ENOMEM;
        }
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+               struct virtchnl_vlan_supported_caps *insertion_support =
+                       &vf->vlan_v2_caps.offloads.insertion_support;
+               uint32_t insertion_cap;
+
+               if (insertion_support->outer)
+                       insertion_cap = insertion_support->outer;
+               else
+                       insertion_cap = insertion_support->inner;
+
+               if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+                       txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+               else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+                       txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2;
+       } else {
+               txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+       }
+
        txq->nb_tx_desc = nb_desc;
        txq->rs_thresh = tx_rs_thresh;
        txq->free_thresh = tx_free_thresh;
@@ -515,6 +793,22 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                ad->tx_vec_allowed = false;
        }
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+           vf->tm_conf.committed) {
+               int tc;
+               for (tc = 0; tc < vf->qos_cap->num_elem; tc++) {
+                       if (txq->queue_id >= vf->qtc_map[tc].start_queue_id &&
+                           txq->queue_id < (vf->qtc_map[tc].start_queue_id +
+                           vf->qtc_map[tc].queue_count))
+                               break;
+               }
+               if (tc >= vf->qos_cap->num_elem) {
+                       PMD_INIT_LOG(ERR, "Queue TC mapping is not correct");
+                       return -EINVAL;
+               }
+               txq->tc = tc;
+       }
+
        return 0;
 }
 
@@ -523,6 +817,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct iavf_rx_queue *rxq;
        int err = 0;
@@ -547,7 +842,11 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        IAVF_WRITE_FLUSH(hw);
 
        /* Ready to switch the queue on */
-       err = iavf_switch_queue(adapter, rx_queue_id, true, true);
+       if (!vf->lv_enabled)
+               err = iavf_switch_queue(adapter, rx_queue_id, true, true);
+       else
+               err = iavf_switch_queue_lv(adapter, rx_queue_id, true, true);
+
        if (err)
                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
                            rx_queue_id);
@@ -563,6 +862,7 @@ iavf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct iavf_tx_queue *txq;
        int err = 0;
@@ -579,7 +879,10 @@ iavf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        IAVF_WRITE_FLUSH(hw);
 
        /* Ready to switch the queue on */
-       err = iavf_switch_queue(adapter, tx_queue_id, false, true);
+       if (!vf->lv_enabled)
+               err = iavf_switch_queue(adapter, tx_queue_id, false, true);
+       else
+               err = iavf_switch_queue_lv(adapter, tx_queue_id, false, true);
 
        if (err)
                PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
@@ -680,12 +983,22 @@ iavf_stop_queues(struct rte_eth_dev *dev)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_rx_queue *rxq;
        struct iavf_tx_queue *txq;
        int ret, i;
 
        /* Stop All queues */
-       ret = iavf_disable_queues(adapter);
+       if (!vf->lv_enabled) {
+               ret = iavf_disable_queues(adapter);
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "Fail to stop queues");
+       } else {
+               ret = iavf_disable_queues_lv(adapter);
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "Fail to stop queues for large VF");
+       }
+
        if (ret)
                PMD_DRV_LOG(WARNING, "Fail to stop queues");
 
@@ -707,6 +1020,14 @@ iavf_stop_queues(struct rte_eth_dev *dev)
        }
 }
 
+#define IAVF_RX_FLEX_ERR0_BITS \
+       ((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |       \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |        \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
 static inline void
 iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 {
@@ -720,6 +1041,31 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
        }
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+                         volatile union iavf_rx_flex_desc *rxdp,
+                         uint8_t rx_flags)
+{
+       uint16_t vlan_tci = 0;
+
+       if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1 &&
+           rte_le_to_cpu_64(rxdp->wb.status_error0) &
+           (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S))
+               vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag1);
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2 &&
+           rte_le_to_cpu_16(rxdp->wb.status_error1) &
+           (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S))
+               vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd);
+#endif
+
+       if (vlan_tci) {
+               mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+               mb->vlan_tci = vlan_tci;
+       }
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -734,6 +1080,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
                                        IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
                        IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+       /* Check if FDIR Match */
+       flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+                               PKT_RX_FDIR : 0);
+
        if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
                flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
                return flags;
@@ -754,6 +1104,93 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
        return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+       uint64_t flags = 0;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       uint16_t flexbh;
+
+       flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+               IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+               IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+       if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+               mb->hash.fdir.hi =
+                       rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+               flags |= PKT_RX_FDIR_ID;
+       }
+#else
+       mb->hash.fdir.hi =
+               rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd_id);
+       flags |= PKT_RX_FDIR_ID;
+#endif
+       return flags;
+}
+
+#define IAVF_RX_FLEX_ERR0_BITS \
+       ((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |       \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |        \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+       uint64_t flags = 0;
+
+       /* check if HW has decoded the packet and checksum */
+       if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+               return 0;
+
+       if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+               flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+               return flags;
+       }
+
+       if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+               flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               flags |= PKT_RX_IP_CKSUM_GOOD;
+
+       if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+               flags |= PKT_RX_L4_CKSUM_BAD;
+       else
+               flags |= PKT_RX_L4_CKSUM_GOOD;
+
+       if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+               flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
+
+       return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+       nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+       if (nb_hold > rxq->rx_free_thresh) {
+               PMD_RX_LOG(DEBUG,
+                          "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+                          rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+               rx_id = (uint16_t)((rx_id == 0) ?
+                       (rxq->nb_rx_desc - 1) : (rx_id - 1));
+               IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+               nb_hold = 0;
+       }
+       rxq->nb_rx_hold = nb_hold;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -805,6 +1242,113 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
+               rx_id++;
+               if (unlikely(rx_id == rxq->nb_rx_desc))
+                       rx_id = 0;
+
+               /* Prefetch next mbuf */
+               rte_prefetch0(rxq->sw_ring[rx_id]);
+
+               /* When next RX descriptor is on a cache line boundary,
+                * prefetch the next 4 RX descriptors and next 8 pointers
+                * to mbufs.
+                */
+               if ((rx_id & 0x3) == 0) {
+                       rte_prefetch0(&rx_ring[rx_id]);
+                       rte_prefetch0(rxq->sw_ring[rx_id]);
+               }
+               rxm = rxe;
+               dma_addr =
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+               rxdp->read.hdr_addr = 0;
+               rxdp->read.pkt_addr = dma_addr;
+
+               rx_packet_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+                               IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
+
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
+               rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+               rxm->nb_segs = 1;
+               rxm->next = NULL;
+               rxm->pkt_len = rx_packet_len;
+               rxm->data_len = rx_packet_len;
+               rxm->port = rxq->port_id;
+               rxm->ol_flags = 0;
+               iavf_rxd_to_vlan_tci(rxm, &rxd);
+               pkt_flags = iavf_rxd_to_pkt_flags(qword1);
+               rxm->packet_type =
+                       ptype_tbl[(uint8_t)((qword1 &
+                       IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT)];
+
+               if (pkt_flags & PKT_RX_RSS_HASH)
+                       rxm->hash.rss =
+                               rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
+               rxm->ol_flags |= pkt_flags;
+
+               rx_pkts[nb_rx++] = rxm;
+       }
+       rxq->rx_tail = rx_id;
+
+       iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+       return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+                       struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       volatile union iavf_rx_desc *rx_ring;
+       volatile union iavf_rx_flex_desc *rxdp;
+       struct iavf_rx_queue *rxq;
+       union iavf_rx_flex_desc rxd;
+       struct rte_mbuf *rxe;
+       struct rte_eth_dev *dev;
+       struct rte_mbuf *rxm;
+       struct rte_mbuf *nmb;
+       uint16_t nb_rx;
+       uint16_t rx_stat_err0;
+       uint16_t rx_packet_len;
+       uint16_t rx_id, nb_hold;
+       uint64_t dma_addr;
+       uint64_t pkt_flags;
+       const uint32_t *ptype_tbl;
+
+       nb_rx = 0;
+       nb_hold = 0;
+       rxq = rx_queue;
+       rx_id = rxq->rx_tail;
+       rx_ring = rxq->rx_ring;
+       ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+       while (nb_rx < nb_pkts) {
+               rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+               rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+               /* Check the DD bit first */
+               if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+                       break;
+               IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+               nmb = rte_mbuf_raw_alloc(rxq->mp);
+               if (unlikely(!nmb)) {
+                       dev = &rte_eth_devices[rxq->port_id];
+                       dev->data->rx_mbuf_alloc_failed++;
+                       PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+                                  "queue_id=%u", rxq->port_id, rxq->queue_id);
+                       break;
+               }
+
+               rxd = *rxdp;
+               nb_hold++;
+               rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (unlikely(rx_id == rxq->nb_rx_desc))
                        rx_id = 0;
@@ -820,57 +1364,186 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        rte_prefetch0(&rx_ring[rx_id]);
                        rte_prefetch0(rxq->sw_ring[rx_id]);
                }
-               rxm = rxe;
-               rxe = nmb;
-               dma_addr =
-                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
-               rxdp->read.hdr_addr = 0;
-               rxdp->read.pkt_addr = dma_addr;
+               rxm = rxe;
+               dma_addr =
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+               rxdp->read.hdr_addr = 0;
+               rxdp->read.pkt_addr = dma_addr;
+
+               rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+                               IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
+               rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+               rxm->nb_segs = 1;
+               rxm->next = NULL;
+               rxm->pkt_len = rx_packet_len;
+               rxm->data_len = rx_packet_len;
+               rxm->port = rxq->port_id;
+               rxm->ol_flags = 0;
+               rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+                       rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+               iavf_flex_rxd_to_vlan_tci(rxm, &rxd, rxq->rx_flags);
+               rxq->rxd_to_pkt_fields(rxq, rxm, &rxd);
+               pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+               rxm->ol_flags |= pkt_flags;
+
+               rx_pkts[nb_rx++] = rxm;
+       }
+       rxq->rx_tail = rx_id;
+
+       iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+       return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+                                 uint16_t nb_pkts)
+{
+       struct iavf_rx_queue *rxq = rx_queue;
+       union iavf_rx_flex_desc rxd;
+       struct rte_mbuf *rxe;
+       struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+       struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+       struct rte_mbuf *nmb, *rxm;
+       uint16_t rx_id = rxq->rx_tail;
+       uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+       struct rte_eth_dev *dev;
+       uint16_t rx_stat_err0;
+       uint64_t dma_addr;
+       uint64_t pkt_flags;
+
+       volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+       volatile union iavf_rx_flex_desc *rxdp;
+       const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+       while (nb_rx < nb_pkts) {
+               rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+               rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+               /* Check the DD bit */
+               if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+                       break;
+               IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+               nmb = rte_mbuf_raw_alloc(rxq->mp);
+               if (unlikely(!nmb)) {
+                       PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+                                  "queue_id=%u", rxq->port_id, rxq->queue_id);
+                       dev = &rte_eth_devices[rxq->port_id];
+                       dev->data->rx_mbuf_alloc_failed++;
+                       break;
+               }
+
+               rxd = *rxdp;
+               nb_hold++;
+               rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
+               rx_id++;
+               if (rx_id == rxq->nb_rx_desc)
+                       rx_id = 0;
+
+               /* Prefetch next mbuf */
+               rte_prefetch0(rxq->sw_ring[rx_id]);
+
+               /* When next RX descriptor is on a cache line boundary,
+                * prefetch the next 4 RX descriptors and next 8 pointers
+                * to mbufs.
+                */
+               if ((rx_id & 0x3) == 0) {
+                       rte_prefetch0(&rx_ring[rx_id]);
+                       rte_prefetch0(rxq->sw_ring[rx_id]);
+               }
+
+               rxm = rxe;
+               dma_addr =
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+               /* Set data buffer address and data length of the mbuf */
+               rxdp->read.hdr_addr = 0;
+               rxdp->read.pkt_addr = dma_addr;
+               rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+                               IAVF_RX_FLX_DESC_PKT_LEN_M;
+               rxm->data_len = rx_packet_len;
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+               /* If this is the first buffer of the received packet, set the
+                * pointer to the first mbuf of the packet and initialize its
+                * context. Otherwise, update the total length and the number
+                * of segments of the current scattered packet, and update the
+                * pointer to the last mbuf of the current packet.
+                */
+               if (!first_seg) {
+                       first_seg = rxm;
+                       first_seg->nb_segs = 1;
+                       first_seg->pkt_len = rx_packet_len;
+               } else {
+                       first_seg->pkt_len =
+                               (uint16_t)(first_seg->pkt_len +
+                                               rx_packet_len);
+                       first_seg->nb_segs++;
+                       last_seg->next = rxm;
+               }
 
-               rx_packet_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
-                               IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
+               /* If this is not the last buffer of the received packet,
+                * update the pointer to the last mbuf of the current scattered
+                * packet and continue to parse the RX ring.
+                */
+               if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+                       last_seg = rxm;
+                       continue;
+               }
 
-               rxm->data_off = RTE_PKTMBUF_HEADROOM;
-               rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
-               rxm->nb_segs = 1;
+               /* This is the last buffer of the received packet. If the CRC
+                * is not stripped by the hardware:
+                *  - Subtract the CRC length from the total packet length.
+                *  - If the last buffer only contains the whole CRC or a part
+                *  of it, free the mbuf associated to the last buffer. If part
+                *  of the CRC is also contained in the previous mbuf, subtract
+                *  the length of that CRC part from the data length of the
+                *  previous mbuf.
+                */
                rxm->next = NULL;
-               rxm->pkt_len = rx_packet_len;
-               rxm->data_len = rx_packet_len;
-               rxm->port = rxq->port_id;
-               rxm->ol_flags = 0;
-               iavf_rxd_to_vlan_tci(rxm, &rxd);
-               pkt_flags = iavf_rxd_to_pkt_flags(qword1);
-               rxm->packet_type =
-                       ptype_tbl[(uint8_t)((qword1 &
-                       IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT)];
+               if (unlikely(rxq->crc_len > 0)) {
+                       first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+                       if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+                               rte_pktmbuf_free_seg(rxm);
+                               first_seg->nb_segs--;
+                               last_seg->data_len =
+                                       (uint16_t)(last_seg->data_len -
+                                       (RTE_ETHER_CRC_LEN - rx_packet_len));
+                               last_seg->next = NULL;
+                       } else {
+                               rxm->data_len = (uint16_t)(rx_packet_len -
+                                                       RTE_ETHER_CRC_LEN);
+                       }
+               }
 
-               if (pkt_flags & PKT_RX_RSS_HASH)
-                       rxm->hash.rss =
-                               rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+               first_seg->port = rxq->port_id;
+               first_seg->ol_flags = 0;
+               first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+                       rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+               iavf_flex_rxd_to_vlan_tci(first_seg, &rxd, rxq->rx_flags);
+               rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd);
+               pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
-               rxm->ol_flags |= pkt_flags;
+               first_seg->ol_flags |= pkt_flags;
 
-               rx_pkts[nb_rx++] = rxm;
+               /* Prefetch data of first segment, if configured to do so. */
+               rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+                                         first_seg->data_off));
+               rx_pkts[nb_rx++] = first_seg;
+               first_seg = NULL;
        }
+
+       /* Record index of the next RX descriptor to probe. */
        rxq->rx_tail = rx_id;
+       rxq->pkt_first_seg = first_seg;
+       rxq->pkt_last_seg = last_seg;
 
-       /* If the number of free RX descriptors is greater than the RX free
-        * threshold of the queue, advance the receive tail register of queue.
-        * Update that register with the value of the last processed RX
-        * descriptor minus 1.
-        */
-       nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-       if (nb_hold > rxq->rx_free_thresh) {
-               PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-                          "nb_hold=%u nb_rx=%u",
-                          rxq->port_id, rxq->queue_id,
-                          rx_id, nb_hold, nb_rx);
-               rx_id = (uint16_t)((rx_id == 0) ?
-                       (rxq->nb_rx_desc - 1) : (rx_id - 1));
-               IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-               nb_hold = 0;
-       }
-       rxq->nb_rx_hold = nb_hold;
+       iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
        return nb_rx;
 }
@@ -921,6 +1594,7 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                rxd = *rxdp;
                nb_hold++;
                rxe = rxq->sw_ring[rx_id];
+               rxq->sw_ring[rx_id] = nmb;
                rx_id++;
                if (rx_id == rxq->nb_rx_desc)
                        rx_id = 0;
@@ -938,7 +1612,6 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                }
 
                rxm = rxe;
-               rxe = nmb;
                dma_addr =
                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1013,6 +1686,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        first_seg->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
                first_seg->ol_flags |= pkt_flags;
 
                /* Prefetch data of first segment, if configured to do so. */
@@ -1027,30 +1703,88 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        rxq->pkt_first_seg = first_seg;
        rxq->pkt_last_seg = last_seg;
 
-       /* If the number of free RX descriptors is greater than the RX free
-        * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-        * register. Update the RDT with the value of the last processed RX
-        * descriptor minus 1, to guarantee that the RDT register is never
-        * equal to the RDH register, which creates a "full" ring situtation
-        * from the hardware point of view.
+       iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+       return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+       volatile union iavf_rx_flex_desc *rxdp;
+       struct rte_mbuf **rxep;
+       struct rte_mbuf *mb;
+       uint16_t stat_err0;
+       uint16_t pkt_len;
+       int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+       int32_t i, j, nb_rx = 0;
+       uint64_t pkt_flags;
+       const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+       rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+       rxep = &rxq->sw_ring[rxq->rx_tail];
+
+       stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+       /* Make sure there is at least 1 packet to receive */
+       if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+               return 0;
+
+       /* Scan LOOK_AHEAD descriptors at a time to determine which
+        * descriptors reference packets that are ready to be received.
         */
-       nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-       if (nb_hold > rxq->rx_free_thresh) {
-               PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-                          "nb_hold=%u nb_rx=%u",
-                          rxq->port_id, rxq->queue_id,
-                          rx_id, nb_hold, nb_rx);
-               rx_id = (uint16_t)(rx_id == 0 ?
-                       (rxq->nb_rx_desc - 1) : (rx_id - 1));
-               IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-               nb_hold = 0;
+       for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+            rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+               /* Read desc statuses backwards to avoid race condition */
+               for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+                       s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+               rte_smp_rmb();
+
+               /* Compute how many status bits were set */
+               for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+                       nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+               nb_rx += nb_dd;
+
+               /* Translate descriptor info to mbuf parameters */
+               for (j = 0; j < nb_dd; j++) {
+                       IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+                                         rxq->rx_tail +
+                                         i * IAVF_LOOK_AHEAD + j);
+
+                       mb = rxep[j];
+                       pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+                               IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+                       mb->data_len = pkt_len;
+                       mb->pkt_len = pkt_len;
+                       mb->ol_flags = 0;
+
+                       mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+                               rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+                       iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j], rxq->rx_flags);
+                       rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
+                       stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+                       pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+                       mb->ol_flags |= pkt_flags;
+               }
+
+               for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+                       rxq->rx_stage[i + j] = rxep[j];
+
+               if (nb_dd != IAVF_LOOK_AHEAD)
+                       break;
        }
-       rxq->nb_rx_hold = nb_hold;
+
+       /* Clear software ring entries */
+       for (i = 0; i < nb_rx; i++)
+               rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
        return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1121,6 +1855,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
                                mb->hash.rss = rte_le_to_cpu_32(
                                        rxdp[j].wb.qword0.hi_dword.rss);
 
+                       if (pkt_flags & PKT_RX_FDIR)
+                               pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
                        mb->ol_flags |= pkt_flags;
                }
 
@@ -1197,7 +1934,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 
        /* Update rx tail register */
        rte_wmb();
-       IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
+       IAVF_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
 
        rxq->rx_free_trigger =
                (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh);
@@ -1219,7 +1956,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        if (rxq->rx_nb_avail)
                return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-       nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+       if (rxq->rxdid >= IAVF_RXDID_FLEX_NIC && rxq->rxdid <= IAVF_RXDID_LAST)
+               nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+       else
+               nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
        rxq->rx_next_avail = 0;
        rxq->rx_nb_avail = nb_rx;
        rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1296,9 +2036,9 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq)
        if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
                        rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
                        rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE)) {
-               PMD_TX_FREE_LOG(DEBUG, "TX descriptor %4u is not done "
-                               "(port=%d queue=%d)", desc_to_clean_to,
-                               txq->port_id, txq->queue_id);
+               PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
+                          "(port=%d queue=%d)", desc_to_clean_to,
+                          txq->port_id, txq->queue_id);
                return -1;
        }
 
@@ -1319,11 +2059,14 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq)
 
 /* Check if the context descriptor is needed for TX offloading */
 static inline uint16_t
-iavf_calc_context_desc(uint64_t flags)
+iavf_calc_context_desc(uint64_t flags, uint8_t vlan_flag)
 {
-       static uint64_t mask = PKT_TX_TCP_SEG;
-
-       return (flags & mask) ? 1 : 0;
+       if (flags & PKT_TX_TCP_SEG)
+               return 1;
+       if (flags & PKT_TX_VLAN_PKT &&
+           vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)
+               return 1;
+       return 0;
 }
 
 static inline void
@@ -1444,6 +2187,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        uint16_t tx_last;
        uint16_t slen;
        uint64_t buf_dma_addr;
+       uint16_t cd_l2tag2 = 0;
        union iavf_tx_offload tx_offload = {0};
 
        txq = tx_queue;
@@ -1454,7 +2198,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        /* Check if the descriptor ring needs to be cleaned. */
        if (txq->nb_free < txq->free_thresh)
-               iavf_xmit_cleanup(txq);
+               (void)iavf_xmit_cleanup(txq);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                td_cmd = 0;
@@ -1469,9 +2213,8 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                tx_offload.l3_len = tx_pkt->l3_len;
                tx_offload.l4_len = tx_pkt->l4_len;
                tx_offload.tso_segsz = tx_pkt->tso_segsz;
-
                /* Calculate the number of context descriptors needed. */
-               nb_ctx = iavf_calc_context_desc(ol_flags);
+               nb_ctx = iavf_calc_context_desc(ol_flags, txq->vlan_flag);
 
                /* The number of descriptors that must be allocated for
                 * a packet equals to the number of the segments of that
@@ -1506,7 +2249,8 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                }
 
                /* Descriptor based VLAN insertion */
-               if (ol_flags & PKT_TX_VLAN_PKT) {
+               if (ol_flags & PKT_TX_VLAN_PKT &&
+                   txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
                        td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
                        td_tag = tx_pkt->vlan_tci;
                }
@@ -1529,6 +2273,11 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                (volatile struct iavf_tx_context_desc *)
                                                        &txr[tx_id];
 
+                       /* clear QW0 or the previous writeback value
+                        * may impact next write
+                        */
+                       *(volatile uint64_t *)ctx_txd = 0;
+
                        txn = &sw_ring[txe->next_id];
                        RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
                        if (txe->mbuf) {
@@ -1541,8 +2290,16 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                cd_type_cmd_tso_mss |=
                                        iavf_set_tso_ctx(tx_pkt, tx_offload);
 
+                       if (ol_flags & PKT_TX_VLAN_PKT &&
+                          txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+                               cd_type_cmd_tso_mss |= IAVF_TX_CTX_DESC_IL2TAG2
+                                       << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+                               cd_l2tag2 = tx_pkt->vlan_tci;
+                       }
+
                        ctx_txd->type_cmd_tso_mss =
                                rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+                       ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
 
                        IAVF_DUMP_TX_DESC(txq, &txr[tx_id], tx_id);
                        txe->last_id = tx_last;
@@ -1603,12 +2360,33 @@ end_of_tx:
        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
                   txq->port_id, txq->queue_id, tx_id, nb_tx);
 
-       IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
+       IAVF_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
 }
 
+/* Check if the packet with vlan user priority is transmitted in the
+ * correct queue.
+ */
+static int
+iavf_check_vlan_up2tc(struct iavf_tx_queue *txq, struct rte_mbuf *m)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       uint16_t up;
+
+       up = m->vlan_tci >> IAVF_VLAN_TAG_PCP_OFFSET;
+
+       if (!(vf->qos_cap->cap[txq->tc].tc_prio & BIT(up))) {
+               PMD_TX_LOG(ERR, "packet with vlan pcp %u cannot transmit in queue %u\n",
+                       up, txq->queue_id);
+               return -1;
+       } else {
+               return 0;
+       }
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -1617,6 +2395,9 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
        int i, ret;
        uint64_t ol_flags;
        struct rte_mbuf *m;
+       struct iavf_tx_queue *txq = tx_queue;
+       struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
        for (i = 0; i < nb_pkts; i++) {
                m = tx_pkts[i];
@@ -1640,7 +2421,7 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
                        return i;
                }
 
-#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+#ifdef RTE_ETHDEV_DEBUG_TX
                ret = rte_validate_tx_offload(m);
                if (ret != 0) {
                        rte_errno = -ret;
@@ -1652,6 +2433,15 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
                        rte_errno = -ret;
                        return i;
                }
+
+               if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+                   ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN)) {
+                       ret = iavf_check_vlan_up2tc(txq, m);
+                       if (ret != 0) {
+                               rte_errno = -ret;
+                               return i;
+                       }
+               }
        }
 
        return i;
@@ -1663,46 +2453,142 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
 #ifdef RTE_ARCH_X86
        struct iavf_rx_queue *rxq;
        int i;
+       int check_ret;
        bool use_avx2 = false;
+       bool use_avx512 = false;
+       bool use_flex = false;
+
+       check_ret = iavf_rx_vec_dev_check(dev);
+       if (check_ret >= 0 &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+                       use_avx2 = true;
+
+#ifdef CC_AVX512_SUPPORT
+               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+                       use_avx512 = true;
+#endif
+
+               if (vf->vf_res->vf_cap_flags &
+                       VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                       use_flex = true;
 
-       if (!iavf_rx_vec_dev_check(dev)) {
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
                        rxq = dev->data->rx_queues[i];
                        (void)iavf_rxq_vec_setup(rxq);
                }
 
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
-                       use_avx2 = true;
-
                if (dev->data->scattered_rx) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "Using %sVector Scattered Rx (port %d).",
-                                   use_avx2 ? "avx2 " : "",
-                                   dev->data->port_id);
-                       dev->rx_pkt_burst = use_avx2 ?
-                                           iavf_recv_scattered_pkts_vec_avx2 :
-                                           iavf_recv_scattered_pkts_vec;
+                       if (!use_avx512) {
+                               PMD_DRV_LOG(DEBUG,
+                                           "Using %sVector Scattered Rx (port %d).",
+                                           use_avx2 ? "avx2 " : "",
+                                           dev->data->port_id);
+                       } else {
+                               if (check_ret == IAVF_VECTOR_PATH)
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 Vector Scattered Rx (port %d).",
+                                                   dev->data->port_id);
+                               else
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 OFFLOAD Vector Scattered Rx (port %d).",
+                                                   dev->data->port_id);
+                       }
+                       if (use_flex) {
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+                                       iavf_recv_scattered_pkts_vec_flex_rxd;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_flex_rxd;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_flex_rxd_offload;
+                               }
+#endif
+                       } else {
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       iavf_recv_scattered_pkts_vec_avx2 :
+                                       iavf_recv_scattered_pkts_vec;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_scattered_pkts_vec_avx512_offload;
+                               }
+#endif
+                       }
                } else {
-                       PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
-                                   use_avx2 ? "avx2 " : "",
-                                   dev->data->port_id);
-                       dev->rx_pkt_burst = use_avx2 ?
-                                           iavf_recv_pkts_vec_avx2 :
-                                           iavf_recv_pkts_vec;
+                       if (!use_avx512) {
+                               PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
+                                           use_avx2 ? "avx2 " : "",
+                                           dev->data->port_id);
+                       } else {
+                               if (check_ret == IAVF_VECTOR_PATH)
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 Vector Rx (port %d).",
+                                                   dev->data->port_id);
+                               else
+                                       PMD_DRV_LOG(DEBUG,
+                                                   "Using AVX512 OFFLOAD Vector Rx (port %d).",
+                                                   dev->data->port_id);
+                       }
+                       if (use_flex) {
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       iavf_recv_pkts_vec_avx2_flex_rxd :
+                                       iavf_recv_pkts_vec_flex_rxd;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_flex_rxd;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_flex_rxd_offload;
+                               }
+#endif
+                       } else {
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       iavf_recv_pkts_vec_avx2 :
+                                       iavf_recv_pkts_vec;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512) {
+                                       if (check_ret == IAVF_VECTOR_PATH)
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512;
+                                       else
+                                               dev->rx_pkt_burst =
+                                                       iavf_recv_pkts_vec_avx512_offload;
+                               }
+#endif
+                       }
                }
 
                return;
        }
-#endif
 
+#endif
        if (dev->data->scattered_rx) {
                PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
                            dev->data->port_id);
-               dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+               if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                       dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+               else
+                       dev->rx_pkt_burst = iavf_recv_scattered_pkts;
        } else if (adapter->rx_bulk_alloc_allowed) {
                PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
                            dev->data->port_id);
@@ -1710,7 +2596,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
        } else {
                PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
                            dev->data->port_id);
-               dev->rx_pkt_burst = iavf_recv_pkts;
+               if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                       dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+               else
+                       dev->rx_pkt_burst = iavf_recv_pkts;
        }
 }
 
@@ -1721,38 +2610,149 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 #ifdef RTE_ARCH_X86
        struct iavf_tx_queue *txq;
        int i;
+       int check_ret;
+       bool use_sse = false;
        bool use_avx2 = false;
+       bool use_avx512 = false;
+
+       check_ret = iavf_tx_vec_dev_check(dev);
+
+       if (check_ret >= 0 &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               /* SSE and AVX2 not support offload path yet. */
+               if (check_ret == IAVF_VECTOR_PATH) {
+                       use_sse = true;
+                       if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                            rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+                               use_avx2 = true;
+               }
+#ifdef CC_AVX512_SUPPORT
+               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+                       use_avx512 = true;
+#endif
+
+               if (!use_sse && !use_avx2 && !use_avx512)
+                       goto normal;
+
+               if (!use_avx512) {
+                       PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
+                                   use_avx2 ? "avx2 " : "",
+                                   dev->data->port_id);
+                       dev->tx_pkt_burst = use_avx2 ?
+                                           iavf_xmit_pkts_vec_avx2 :
+                                           iavf_xmit_pkts_vec;
+               }
+               dev->tx_pkt_prepare = NULL;
+#ifdef CC_AVX512_SUPPORT
+               if (use_avx512) {
+                       if (check_ret == IAVF_VECTOR_PATH) {
+                               dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
+                               PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
+                                           dev->data->port_id);
+                       } else {
+                               dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
+                               dev->tx_pkt_prepare = iavf_prep_pkts;
+                               PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
+                                           dev->data->port_id);
+                       }
+               }
+#endif
 
-       if (!iavf_tx_vec_dev_check(dev)) {
                for (i = 0; i < dev->data->nb_tx_queues; i++) {
                        txq = dev->data->tx_queues[i];
                        if (!txq)
                                continue;
+#ifdef CC_AVX512_SUPPORT
+                       if (use_avx512)
+                               iavf_txq_vec_setup_avx512(txq);
+                       else
+                               iavf_txq_vec_setup(txq);
+#else
                        iavf_txq_vec_setup(txq);
+#endif
                }
 
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
-                       use_avx2 = true;
-
-               PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-                           use_avx2 ? "avx2 " : "",
-                           dev->data->port_id);
-               dev->tx_pkt_burst = use_avx2 ?
-                                   iavf_xmit_pkts_vec_avx2 :
-                                   iavf_xmit_pkts_vec;
-               dev->tx_pkt_prepare = NULL;
-
                return;
        }
-#endif
 
+normal:
+#endif
        PMD_DRV_LOG(DEBUG, "Using Basic Tx callback (port=%d).",
                    dev->data->port_id);
        dev->tx_pkt_burst = iavf_xmit_pkts;
        dev->tx_pkt_prepare = iavf_prep_pkts;
 }
 
+static int
+iavf_tx_done_cleanup_full(struct iavf_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       struct iavf_tx_entry *swr_ring = txq->sw_ring;
+       uint16_t i, tx_last, tx_id;
+       uint16_t nb_tx_free_last;
+       uint16_t nb_tx_to_clean;
+       uint32_t pkt_cnt;
+
+       /* Start free mbuf from the next of tx_tail */
+       tx_last = txq->tx_tail;
+       tx_id  = swr_ring[tx_last].next_id;
+
+       if (txq->nb_free == 0 && iavf_xmit_cleanup(txq))
+               return 0;
+
+       nb_tx_to_clean = txq->nb_free;
+       nb_tx_free_last = txq->nb_free;
+       if (!free_cnt)
+               free_cnt = txq->nb_tx_desc;
+
+       /* Loop through swr_ring to count the amount of
+        * freeable mubfs and packets.
+        */
+       for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+               for (i = 0; i < nb_tx_to_clean &&
+                       pkt_cnt < free_cnt &&
+                       tx_id != tx_last; i++) {
+                       if (swr_ring[tx_id].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+                               swr_ring[tx_id].mbuf = NULL;
+
+                               /*
+                                * last segment in the packet,
+                                * increment packet count
+                                */
+                               pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+                       }
+
+                       tx_id = swr_ring[tx_id].next_id;
+               }
+
+               if (txq->rs_thresh > txq->nb_tx_desc -
+                       txq->nb_free || tx_id == tx_last)
+                       break;
+
+               if (pkt_cnt < free_cnt) {
+                       if (iavf_xmit_cleanup(txq))
+                               break;
+
+                       nb_tx_to_clean = txq->nb_free - nb_tx_free_last;
+                       nb_tx_free_last = txq->nb_free;
+               }
+       }
+
+       return (int)pkt_cnt;
+}
+
+int
+iavf_dev_tx_done_cleanup(void *txq, uint32_t free_cnt)
+{
+       struct iavf_tx_queue *q = (struct iavf_tx_queue *)txq;
+
+       return iavf_tx_done_cleanup_full(q, free_cnt);
+}
+
 void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
                     struct rte_eth_rxq_info *qinfo)
@@ -1797,6 +2797,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
        rxq = dev->data->rx_queues[queue_id];
        rxdp = &rxq->rx_ring[rxq->rx_tail];
+
        while ((desc < rxq->nb_rx_desc) &&
               ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
                 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
@@ -2406,6 +3407,50 @@ iavf_get_default_ptype_table(void)
                        RTE_PTYPE_TUNNEL_GTPU |
                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                        RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> UDP ECPRI */
+               [372] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [373] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [374] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [375] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [376] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [377] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [378] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [379] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [380] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [381] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+
+               /* IPV6 --> UDP ECPRI */
+               [382] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [383] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [384] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [385] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [386] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [387] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [388] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [389] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [390] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [391] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
                /* All others reserved */
        };