net/iavf: support flex desc metadata extraction
[dpdk.git] / drivers / net / iavf / iavf_rxtx.c
index 847a5f8..baac5d6 100644 (file)
 #include <rte_udp.h>
 #include <rte_ip.h>
 #include <rte_net.h>
+#include <rte_vect.h>
 
 #include "iavf.h"
 #include "iavf_rxtx.h"
+#include "rte_pmd_iavf.h"
+
+/* Offset of mbuf dynamic field for protocol extraction's metadata */
+int rte_pmd_ifd_dynfield_proto_xtr_metadata_offs = -1;
+
+/* Mask of mbuf dynamic flags for protocol extraction's type */
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_vlan_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_tcp_mask;
+uint64_t rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask;
+
+uint8_t
+iavf_proto_xtr_type_to_rxdid(uint8_t flex_type)
+{
+       static uint8_t rxdid_map[] = {
+               [IAVF_PROTO_XTR_NONE]      = IAVF_RXDID_COMMS_OVS_1,
+               [IAVF_PROTO_XTR_VLAN]      = IAVF_RXDID_COMMS_AUX_VLAN,
+               [IAVF_PROTO_XTR_IPV4]      = IAVF_RXDID_COMMS_AUX_IPV4,
+               [IAVF_PROTO_XTR_IPV6]      = IAVF_RXDID_COMMS_AUX_IPV6,
+               [IAVF_PROTO_XTR_IPV6_FLOW] = IAVF_RXDID_COMMS_AUX_IPV6_FLOW,
+               [IAVF_PROTO_XTR_TCP]       = IAVF_RXDID_COMMS_AUX_TCP,
+               [IAVF_PROTO_XTR_IP_OFFSET] = IAVF_RXDID_COMMS_AUX_IP_OFFSET,
+       };
+
+       return flex_type < RTE_DIM(rxdid_map) ?
+                               rxdid_map[flex_type] : IAVF_RXDID_COMMS_OVS_1;
+}
 
 static inline int
 check_rx_thresh(uint16_t nb_desc, uint16_t thresh)
@@ -294,6 +324,160 @@ static const struct iavf_txq_ops def_txq_ops = {
        .release_mbufs = release_txq_mbufs,
 };
 
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+                                   struct rte_mbuf *mb,
+                                   volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       uint16_t stat_err;
+#endif
+
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+#endif
+}
+
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+                                      struct rte_mbuf *mb,
+                                      volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               stat_err = rte_le_to_cpu_16(desc->status_error1);
+
+               if (stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_S))
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+
+               if (stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_S))
+                       metadata |=
+                               rte_le_to_cpu_16(desc->flex_ts.flex.aux1) << 16;
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_PMD_IFD_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+static inline void
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+                                      struct rte_mbuf *mb,
+                                      volatile union iavf_rx_flex_desc *rxdp)
+{
+       volatile struct iavf_32b_rx_flex_desc_comms *desc =
+                       (volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               if (desc->flex_ts.flex.aux0 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+               else if (desc->flex_ts.flex.aux1 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux1);
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_PMD_IFD_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+static void
+iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+{
+       switch (rxdid) {
+       case IAVF_RXDID_COMMS_AUX_VLAN:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_vlan_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV4:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV6:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IPV6_FLOW:
+               rxq->xtr_ol_flag =
+                       rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_TCP:
+               rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_tcp_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+       case IAVF_RXDID_COMMS_AUX_IP_OFFSET:
+               rxq->xtr_ol_flag =
+                       rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask;
+               rxq->rxd_to_pkt_fields =
+                       iavf_rxd_to_pkt_fields_by_comms_aux_v2;
+               break;
+       case IAVF_RXDID_COMMS_OVS_1:
+               rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+       default:
+               /* update this according to the RXDID for FLEX_DESC_NONE */
+               rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+       }
+
+       if (!rte_pmd_ifd_dynf_proto_xtr_metadata_avail())
+               rxq->xtr_ol_flag = 0;
+}
+
 int
 iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                       uint16_t nb_desc, unsigned int socket_id,
@@ -309,6 +493,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        struct iavf_rx_queue *rxq;
        const struct rte_memzone *mz;
        uint32_t ring_size;
+       uint8_t proto_xtr;
        uint16_t len;
        uint16_t rx_free_thresh;
 
@@ -346,14 +531,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                return -ENOMEM;
        }
 
-       if (vf->vf_res->vf_cap_flags &
-           VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
-           vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
-               rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+               proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
+                               IAVF_PROTO_XTR_NONE;
+               rxq->rxdid = iavf_proto_xtr_type_to_rxdid(proto_xtr);
+               rxq->proto_xtr = proto_xtr;
        } else {
                rxq->rxdid = IAVF_RXDID_LEGACY_1;
+               rxq->proto_xtr = IAVF_PROTO_XTR_NONE;
        }
 
+       iavf_select_rxd_to_pkt_fields_handler(rxq, rxq->rxdid);
+
        rxq->mp = mp;
        rxq->nb_rx_desc = nb_desc;
        rxq->rx_free_thresh = rx_free_thresh;
@@ -531,6 +720,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct iavf_rx_queue *rxq;
        int err = 0;
@@ -555,7 +745,11 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        IAVF_WRITE_FLUSH(hw);
 
        /* Ready to switch the queue on */
-       err = iavf_switch_queue(adapter, rx_queue_id, true, true);
+       if (!vf->lv_enabled)
+               err = iavf_switch_queue(adapter, rx_queue_id, true, true);
+       else
+               err = iavf_switch_queue_lv(adapter, rx_queue_id, true, true);
+
        if (err)
                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
                            rx_queue_id);
@@ -571,6 +765,7 @@ iavf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct iavf_tx_queue *txq;
        int err = 0;
@@ -587,7 +782,10 @@ iavf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        IAVF_WRITE_FLUSH(hw);
 
        /* Ready to switch the queue on */
-       err = iavf_switch_queue(adapter, tx_queue_id, false, true);
+       if (!vf->lv_enabled)
+               err = iavf_switch_queue(adapter, tx_queue_id, false, true);
+       else
+               err = iavf_switch_queue_lv(adapter, tx_queue_id, false, true);
 
        if (err)
                PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
@@ -688,12 +886,22 @@ iavf_stop_queues(struct rte_eth_dev *dev)
 {
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_rx_queue *rxq;
        struct iavf_tx_queue *txq;
        int ret, i;
 
        /* Stop All queues */
-       ret = iavf_disable_queues(adapter);
+       if (!vf->lv_enabled) {
+               ret = iavf_disable_queues(adapter);
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "Fail to stop queues");
+       } else {
+               ret = iavf_disable_queues_lv(adapter);
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "Fail to stop queues for large VF");
+       }
+
        if (ret)
                PMD_DRV_LOG(WARNING, "Fail to stop queues");
 
@@ -715,6 +923,14 @@ iavf_stop_queues(struct rte_eth_dev *dev)
        }
 }
 
+#define IAVF_RX_FLEX_ERR0_BITS \
+       ((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |       \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |  \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |        \
+        (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
 static inline void
 iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 {
@@ -740,6 +956,21 @@ iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
        } else {
                mb->vlan_tci = 0;
        }
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       if (rte_le_to_cpu_16(rxdp->wb.status_error1) &
+           (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) {
+               mb->ol_flags |= PKT_RX_QINQ_STRIPPED | PKT_RX_QINQ |
+                               PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
+               mb->vlan_tci_outer = mb->vlan_tci;
+               mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd);
+               PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
+                          rte_le_to_cpu_16(rxdp->wb.l2tag2_1st),
+                          rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd));
+       } else {
+               mb->vlan_tci_outer = 0;
+       }
+#endif
 }
 
 /* Translate the rx descriptor status and error fields to pkt flags */
@@ -756,6 +987,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
                                        IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
                        IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+       /* Check if FDIR Match */
+       flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+                               PKT_RX_FDIR : 0);
+
        if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
                flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
                return flags;
@@ -776,23 +1011,29 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
        return flags;
 }
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-/* Translate the rx flex descriptor status to pkt flags */
-static inline void
-iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
-                      volatile union iavf_rx_flex_desc *rxdp)
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
 {
-       volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
-                       (volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
-       uint16_t stat_err;
+       uint64_t flags = 0;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+       uint16_t flexbh;
 
-       stat_err = rte_le_to_cpu_16(desc->status_error0);
-       if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
-               mb->ol_flags |= PKT_RX_RSS_HASH;
-               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+               IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+               IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+       if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+               mb->hash.fdir.hi =
+                       rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+               flags |= PKT_RX_FDIR_ID;
        }
-}
+#else
+       mb->hash.fdir.hi =
+               rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd_id);
+       flags |= PKT_RX_FDIR_ID;
 #endif
+       return flags;
+}
 
 #define IAVF_RX_FLEX_ERR0_BITS \
        ((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |       \
@@ -924,7 +1165,6 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        rte_prefetch0(rxq->sw_ring[rx_id]);
                }
                rxm = rxe;
-               rxe = nmb;
                dma_addr =
                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
                rxdp->read.hdr_addr = 0;
@@ -951,6 +1191,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        rxm->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
                rxm->ol_flags |= pkt_flags;
 
                rx_pkts[nb_rx++] = rxm;
@@ -1027,7 +1270,6 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
                        rte_prefetch0(rxq->sw_ring[rx_id]);
                }
                rxm = rxe;
-               rxe = nmb;
                dma_addr =
                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
                rxdp->read.hdr_addr = 0;
@@ -1047,9 +1289,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
                rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
                        rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
                iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-               iavf_rxd_to_pkt_fields(rxm, &rxd);
-#endif
+               rxq->rxd_to_pkt_fields(rxq, rxm, &rxd);
                pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
                rxm->ol_flags |= pkt_flags;
 
@@ -1122,7 +1362,6 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
                }
 
                rxm = rxe;
-               rxe = nmb;
                dma_addr =
                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1191,9 +1430,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
                first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
                        rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
                iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-               iavf_rxd_to_pkt_fields(first_seg, &rxd);
-#endif
+               rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd);
                pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
                first_seg->ol_flags |= pkt_flags;
@@ -1278,7 +1515,6 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                }
 
                rxm = rxe;
-               rxe = nmb;
                dma_addr =
                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1353,6 +1589,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        first_seg->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
                first_seg->ol_flags |= pkt_flags;
 
                /* Prefetch data of first segment, if configured to do so. */
@@ -1428,9 +1667,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
                        mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
                                rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
                        iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-                       iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
-#endif
+                       rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
                        stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
                        pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
 
@@ -1521,6 +1758,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
                                mb->hash.rss = rte_le_to_cpu_32(
                                        rxdp[j].wb.qword0.hi_dword.rss);
 
+                       if (pkt_flags & PKT_RX_FDIR)
+                               pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
                        mb->ol_flags |= pkt_flags;
                }
 
@@ -1619,7 +1859,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        if (rxq->rx_nb_avail)
                return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-       if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+       if (rxq->rxdid >= IAVF_RXDID_FLEX_NIC && rxq->rxdid <= IAVF_RXDID_LAST)
                nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
        else
                nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
@@ -1857,7 +2097,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        /* Check if the descriptor ring needs to be cleaned. */
        if (txq->nb_free < txq->free_thresh)
-               iavf_xmit_cleanup(txq);
+               (void)iavf_xmit_cleanup(txq);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                td_cmd = 0;
@@ -1872,7 +2112,6 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                tx_offload.l3_len = tx_pkt->l3_len;
                tx_offload.l4_len = tx_pkt->l4_len;
                tx_offload.tso_segsz = tx_pkt->tso_segsz;
-
                /* Calculate the number of context descriptors needed. */
                nb_ctx = iavf_calc_context_desc(ol_flags);
 
@@ -2067,20 +2306,32 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
 #ifdef RTE_ARCH_X86
        struct iavf_rx_queue *rxq;
        int i;
        bool use_avx2 = false;
+#ifdef CC_AVX512_SUPPORT
+       bool use_avx512 = false;
+#endif
 
-       if (!iavf_rx_vec_dev_check(dev)) {
+       if (!iavf_rx_vec_dev_check(dev) &&
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
                        rxq = dev->data->rx_queues[i];
                        (void)iavf_rxq_vec_setup(rxq);
                }
 
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+               if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                        use_avx2 = true;
+#ifdef CC_AVX512_SUPPORT
+               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+                       use_avx512 = true;
+#endif
 
                if (dev->data->scattered_rx) {
                        PMD_DRV_LOG(DEBUG,
@@ -2088,27 +2339,49 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
                                    use_avx2 ? "avx2 " : "",
                                    dev->data->port_id);
                        if (vf->vf_res->vf_cap_flags &
-                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
                                        iavf_recv_scattered_pkts_vec_flex_rxd;
-                       else
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512)
+                                       dev->rx_pkt_burst =
+                                               iavf_recv_scattered_pkts_vec_avx512_flex_rxd;
+#endif
+                       } else {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_scattered_pkts_vec_avx2 :
                                        iavf_recv_scattered_pkts_vec;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512)
+                                       dev->rx_pkt_burst =
+                                               iavf_recv_scattered_pkts_vec_avx512;
+#endif
+                       }
                } else {
                        PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
                                    use_avx2 ? "avx2 " : "",
                                    dev->data->port_id);
                        if (vf->vf_res->vf_cap_flags &
-                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+                               VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_pkts_vec_avx2_flex_rxd :
                                        iavf_recv_pkts_vec_flex_rxd;
-                       else
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512)
+                                       dev->rx_pkt_burst =
+                                               iavf_recv_pkts_vec_avx512_flex_rxd;
+#endif
+                       } else {
                                dev->rx_pkt_burst = use_avx2 ?
                                        iavf_recv_pkts_vec_avx2 :
                                        iavf_recv_pkts_vec;
+#ifdef CC_AVX512_SUPPORT
+                               if (use_avx512)
+                                       dev->rx_pkt_burst =
+                                               iavf_recv_pkts_vec_avx512;
+#endif
+                       }
                }
 
                return;
@@ -2144,18 +2417,22 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
        struct iavf_tx_queue *txq;
        int i;
        bool use_avx2 = false;
+#ifdef CC_AVX512_SUPPORT
+       bool use_avx512 = false;
+#endif
 
-       if (!iavf_tx_vec_dev_check(dev)) {
-               for (i = 0; i < dev->data->nb_tx_queues; i++) {
-                       txq = dev->data->tx_queues[i];
-                       if (!txq)
-                               continue;
-                       iavf_txq_vec_setup(txq);
-               }
-
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+       if (!iavf_tx_vec_dev_check(dev) &&
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                        use_avx2 = true;
+#ifdef CC_AVX512_SUPPORT
+               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 &&
+                   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+                       use_avx512 = true;
+#endif
 
                PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
                            use_avx2 ? "avx2 " : "",
@@ -2163,8 +2440,26 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
                dev->tx_pkt_burst = use_avx2 ?
                                    iavf_xmit_pkts_vec_avx2 :
                                    iavf_xmit_pkts_vec;
+#ifdef CC_AVX512_SUPPORT
+               if (use_avx512)
+                       dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
+#endif
                dev->tx_pkt_prepare = NULL;
 
+               for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                       txq = dev->data->tx_queues[i];
+                       if (!txq)
+                               continue;
+#ifdef CC_AVX512_SUPPORT
+                       if (use_avx512)
+                               iavf_txq_vec_setup_avx512(txq);
+                       else
+                               iavf_txq_vec_setup(txq);
+#else
+                       iavf_txq_vec_setup(txq);
+#endif
+               }
+
                return;
        }
 #endif
@@ -2175,6 +2470,73 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
        dev->tx_pkt_prepare = iavf_prep_pkts;
 }
 
+static int
+iavf_tx_done_cleanup_full(struct iavf_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       struct iavf_tx_entry *swr_ring = txq->sw_ring;
+       uint16_t i, tx_last, tx_id;
+       uint16_t nb_tx_free_last;
+       uint16_t nb_tx_to_clean;
+       uint32_t pkt_cnt;
+
+       /* Start free mbuf from the next of tx_tail */
+       tx_last = txq->tx_tail;
+       tx_id  = swr_ring[tx_last].next_id;
+
+       if (txq->nb_free == 0 && iavf_xmit_cleanup(txq))
+               return 0;
+
+       nb_tx_to_clean = txq->nb_free;
+       nb_tx_free_last = txq->nb_free;
+       if (!free_cnt)
+               free_cnt = txq->nb_tx_desc;
+
+       /* Loop through swr_ring to count the amount of
+        * freeable mubfs and packets.
+        */
+       for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+               for (i = 0; i < nb_tx_to_clean &&
+                       pkt_cnt < free_cnt &&
+                       tx_id != tx_last; i++) {
+                       if (swr_ring[tx_id].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+                               swr_ring[tx_id].mbuf = NULL;
+
+                               /*
+                                * last segment in the packet,
+                                * increment packet count
+                                */
+                               pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+                       }
+
+                       tx_id = swr_ring[tx_id].next_id;
+               }
+
+               if (txq->rs_thresh > txq->nb_tx_desc -
+                       txq->nb_free || tx_id == tx_last)
+                       break;
+
+               if (pkt_cnt < free_cnt) {
+                       if (iavf_xmit_cleanup(txq))
+                               break;
+
+                       nb_tx_to_clean = txq->nb_free - nb_tx_free_last;
+                       nb_tx_free_last = txq->nb_free;
+               }
+       }
+
+       return (int)pkt_cnt;
+}
+
+int
+iavf_dev_tx_done_cleanup(void *txq, uint32_t free_cnt)
+{
+       struct iavf_tx_queue *q = (struct iavf_tx_queue *)txq;
+
+       return iavf_tx_done_cleanup_full(q, free_cnt);
+}
+
 void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
                     struct rte_eth_rxq_info *qinfo)