net/ice: fix DCF crash on Rx
[dpdk.git] / drivers / net / ice / ice_rxtx.c
index 47a654a..5fbd68e 100644 (file)
@@ -4,7 +4,9 @@
 
 #include <rte_ethdev_driver.h>
 #include <rte_net.h>
+#include <rte_vect.h>
 
+#include "rte_pmd_ice.h"
 #include "ice_rxtx.h"
 
 #define ICE_TX_CKSUM_OFFLOAD_MASK (             \
                PKT_TX_TCP_SEG |                 \
                PKT_TX_OUTER_IP_CKSUM)
 
-static inline uint8_t
-ice_rxdid_to_proto_xtr_type(uint8_t rxdid)
-{
-       static uint8_t xtr_map[] = {
-               [ICE_RXDID_COMMS_AUX_VLAN]      = PROTO_XTR_VLAN,
-               [ICE_RXDID_COMMS_AUX_IPV4]      = PROTO_XTR_IPV4,
-               [ICE_RXDID_COMMS_AUX_IPV6]      = PROTO_XTR_IPV6,
-               [ICE_RXDID_COMMS_AUX_IPV6_FLOW] = PROTO_XTR_IPV6_FLOW,
-               [ICE_RXDID_COMMS_AUX_TCP]       = PROTO_XTR_TCP,
-       };
+/* Offset of mbuf dynamic field for protocol extraction data */
+int rte_net_ice_dynfield_proto_xtr_metadata_offs = -1;
 
-       return rxdid < RTE_DIM(xtr_map) ? xtr_map[rxdid] : PROTO_XTR_NONE;
-}
+/* Mask of mbuf dynamic flags for protocol extraction type */
+uint64_t rte_net_ice_dynflag_proto_xtr_vlan_mask;
+uint64_t rte_net_ice_dynflag_proto_xtr_ipv4_mask;
+uint64_t rte_net_ice_dynflag_proto_xtr_ipv6_mask;
+uint64_t rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask;
+uint64_t rte_net_ice_dynflag_proto_xtr_tcp_mask;
+uint64_t rte_net_ice_dynflag_proto_xtr_ip_offset_mask;
 
 static inline uint8_t
 ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 {
        static uint8_t rxdid_map[] = {
-               [PROTO_XTR_NONE]      = ICE_RXDID_COMMS_GENERIC,
+               [PROTO_XTR_NONE]      = ICE_RXDID_COMMS_OVS,
                [PROTO_XTR_VLAN]      = ICE_RXDID_COMMS_AUX_VLAN,
                [PROTO_XTR_IPV4]      = ICE_RXDID_COMMS_AUX_IPV4,
                [PROTO_XTR_IPV6]      = ICE_RXDID_COMMS_AUX_IPV6,
                [PROTO_XTR_IPV6_FLOW] = ICE_RXDID_COMMS_AUX_IPV6_FLOW,
                [PROTO_XTR_TCP]       = ICE_RXDID_COMMS_AUX_TCP,
+               [PROTO_XTR_IP_OFFSET] = ICE_RXDID_COMMS_AUX_IP_OFFSET,
        };
 
        return xtr_type < RTE_DIM(rxdid_map) ?
-                               rxdid_map[xtr_type] : ICE_RXDID_COMMS_GENERIC;
+                               rxdid_map[xtr_type] : ICE_RXDID_COMMS_OVS;
+}
+
+static inline void
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+                                      struct rte_mbuf *mb,
+                                      volatile union ice_rx_flex_desc *rxdp)
+{
+       volatile struct ice_32b_rx_flex_desc_comms *desc =
+                       (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err = rte_le_to_cpu_16(desc->status_error0);
+
+       if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+#endif
+}
+
+static inline void
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+                                  struct rte_mbuf *mb,
+                                  volatile union ice_rx_flex_desc *rxdp)
+{
+       volatile struct ice_32b_rx_flex_desc_comms_ovs *desc =
+                       (volatile struct ice_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+       uint16_t stat_err;
+#endif
+
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+#endif
+}
+
+static inline void
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+                                     struct rte_mbuf *mb,
+                                     volatile union ice_rx_flex_desc *rxdp)
+{
+       volatile struct ice_32b_rx_flex_desc_comms *desc =
+                       (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               stat_err = rte_le_to_cpu_16(desc->status_error1);
+
+               if (stat_err & (1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_S))
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+
+               if (stat_err & (1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_S))
+                       metadata |=
+                               rte_le_to_cpu_16(desc->flex_ts.flex.aux1) << 16;
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_NET_ICE_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+static inline void
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+                                     struct rte_mbuf *mb,
+                                     volatile union ice_rx_flex_desc *rxdp)
+{
+       volatile struct ice_32b_rx_flex_desc_comms *desc =
+                       (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
+       uint16_t stat_err;
+
+       stat_err = rte_le_to_cpu_16(desc->status_error0);
+       if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+               mb->ol_flags |= PKT_RX_RSS_HASH;
+               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+       }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+       if (desc->flow_id != 0xFFFFFFFF) {
+               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+       }
+
+       if (rxq->xtr_ol_flag) {
+               uint32_t metadata = 0;
+
+               if (desc->flex_ts.flex.aux0 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
+               else if (desc->flex_ts.flex.aux1 != 0xFFFF)
+                       metadata = rte_le_to_cpu_16(desc->flex_ts.flex.aux1);
+
+               if (metadata) {
+                       mb->ol_flags |= rxq->xtr_ol_flag;
+
+                       *RTE_NET_ICE_DYNF_PROTO_XTR_METADATA(mb) = metadata;
+               }
+       }
+#endif
+}
+
+void
+ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+{
+       switch (rxdid) {
+       case ICE_RXDID_COMMS_AUX_VLAN:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_vlan_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+
+       case ICE_RXDID_COMMS_AUX_IPV4:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv4_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+
+       case ICE_RXDID_COMMS_AUX_IPV6:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv6_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+
+       case ICE_RXDID_COMMS_AUX_IPV6_FLOW:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+
+       case ICE_RXDID_COMMS_AUX_TCP:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_tcp_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1;
+               break;
+
+       case ICE_RXDID_COMMS_AUX_IP_OFFSET:
+               rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ip_offset_mask;
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v2;
+               break;
+
+       case ICE_RXDID_COMMS_GENERIC:
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_generic;
+               break;
+
+       case ICE_RXDID_COMMS_OVS:
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+
+       default:
+               /* update this according to the RXDID for PROTO_XTR_NONE */
+               rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_ovs;
+               break;
+       }
+
+       if (!rte_net_ice_dynf_proto_xtr_metadata_avail())
+               rxq->xtr_ol_flag = 0;
 }
 
 static enum ice_status
@@ -48,12 +227,13 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 {
        struct ice_vsi *vsi = rxq->vsi;
        struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
+       struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
        struct rte_eth_dev *dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
        struct ice_rlan_ctx rx_ctx;
        enum ice_status err;
        uint16_t buf_size, len;
        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-       uint32_t rxdid = ICE_RXDID_COMMS_GENERIC;
+       uint32_t rxdid = ICE_RXDID_COMMS_OVS;
        uint32_t regval;
 
        /* Set buffer size as the head split is disabled. */
@@ -118,6 +298,14 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
        PMD_DRV_LOG(DEBUG, "Port (%u) - Rx queue (%u) is set with RXDID : %u",
                    rxq->port_id, rxq->queue_id, rxdid);
 
+       if (!(pf->supported_rxdid & BIT(rxdid))) {
+               PMD_DRV_LOG(ERR, "currently package doesn't support RXDID (%u)",
+                           rxdid);
+               return -EINVAL;
+       }
+
+       ice_select_rxd_to_pkt_fields_handler(rxq, rxdid);
+
        /* Enable Flexible Descriptors in the queue context which
         * allows this driver to select a specific receive descriptor format
         */
@@ -217,23 +405,12 @@ _ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
                        rxq->sw_ring[i].mbuf = NULL;
                }
        }
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
-               if (rxq->rx_nb_avail == 0)
-                       return;
-               for (i = 0; i < rxq->rx_nb_avail; i++) {
-                       struct rte_mbuf *mbuf;
-
-                       mbuf = rxq->rx_stage[rxq->rx_next_avail + i];
-                       rte_pktmbuf_free_seg(mbuf);
-               }
-               rxq->rx_nb_avail = 0;
-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */
-}
+       if (rxq->rx_nb_avail == 0)
+               return;
+       for (i = 0; i < rxq->rx_nb_avail; i++)
+               rte_pktmbuf_free_seg(rxq->rx_stage[rxq->rx_next_avail + i]);
 
-static void
-ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
-{
-       rxq->rx_rel_mbufs(rxq);
+       rxq->rx_nb_avail = 0;
 }
 
 /* turn on or off rx queue
@@ -290,16 +467,10 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
 ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
-#else
-ice_check_rx_burst_bulk_alloc_preconditions
-       (__rte_unused struct ice_rx_queue *rxq)
-#endif
 {
        int ret = 0;
 
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
        if (!(rxq->rx_free_thresh >= ICE_RX_MAX_BURST)) {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
                             "rxq->rx_free_thresh=%d, "
@@ -319,9 +490,6 @@ ice_check_rx_burst_bulk_alloc_preconditions
                             rxq->nb_rx_desc, rxq->rx_free_thresh);
                ret = -EINVAL;
        }
-#else
-       ret = -EINVAL;
-#endif
 
        return ret;
 }
@@ -338,17 +506,11 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
                return;
        }
 
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
-       if (ice_check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
-               len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
-       else
-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */
-               len = rxq->nb_rx_desc;
+       len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
        for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
                ((volatile char *)rxq->rx_ring)[i] = 0;
 
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
        memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
        for (i = 0; i < ICE_RX_MAX_BURST; ++i)
                rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
@@ -356,7 +518,6 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
        rxq->rx_nb_avail = 0;
        rxq->rx_next_avail = 0;
        rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */
 
        rxq->rx_tail = 0;
        rxq->nb_rx_hold = 0;
@@ -405,12 +566,12 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        /* Init the RX tail register. */
        ICE_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
 
-       err = ice_switch_rx_queue(hw, rxq->reg_idx, TRUE);
+       err = ice_switch_rx_queue(hw, rxq->reg_idx, true);
        if (err) {
                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
                            rx_queue_id);
 
-               ice_rx_queue_release_mbufs(rxq);
+               rxq->rx_rel_mbufs(rxq);
                ice_reset_rx_queue(rxq);
                return -EINVAL;
        }
@@ -431,13 +592,13 @@ ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        if (rx_queue_id < dev->data->nb_rx_queues) {
                rxq = dev->data->rx_queues[rx_queue_id];
 
-               err = ice_switch_rx_queue(hw, rxq->reg_idx, FALSE);
+               err = ice_switch_rx_queue(hw, rxq->reg_idx, false);
                if (err) {
                        PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
                                    rx_queue_id);
                        return -EINVAL;
                }
-               ice_rx_queue_release_mbufs(rxq);
+               rxq->rx_rel_mbufs(rxq);
                ice_reset_rx_queue(rxq);
                dev->data->rx_queue_state[rx_queue_id] =
                        RTE_ETH_QUEUE_STATE_STOPPED;
@@ -453,8 +614,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        int err;
        struct ice_vsi *vsi;
        struct ice_hw *hw;
-       struct ice_aqc_add_tx_qgrp txq_elem;
+       struct ice_aqc_add_tx_qgrp *txq_elem;
        struct ice_tlan_ctx tx_ctx;
+       int buf_len;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -471,13 +633,17 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                return -EINVAL;
        }
 
+       buf_len = ice_struct_size(txq_elem, txqs, 1);
+       txq_elem = ice_malloc(hw, buf_len);
+       if (!txq_elem)
+               return -ENOMEM;
+
        vsi = txq->vsi;
        hw = ICE_VSI_TO_HW(vsi);
 
-       memset(&txq_elem, 0, sizeof(txq_elem));
        memset(&tx_ctx, 0, sizeof(tx_ctx));
-       txq_elem.num_txqs = 1;
-       txq_elem.txqs[0].txq_id = rte_cpu_to_le_16(txq->reg_idx);
+       txq_elem->num_txqs = 1;
+       txq_elem->txqs[0].txq_id = rte_cpu_to_le_16(txq->reg_idx);
 
        tx_ctx.base = txq->tx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
        tx_ctx.qlen = txq->nb_tx_desc;
@@ -489,7 +655,7 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        tx_ctx.tso_qnum = txq->reg_idx; /* index for tso state structure */
        tx_ctx.legacy_int = 1; /* Legacy or Advanced Host Interface */
 
-       ice_set_ctx((uint8_t *)&tx_ctx, txq_elem.txqs[0].txq_ctx,
+       ice_set_ctx(hw, (uint8_t *)&tx_ctx, txq_elem->txqs[0].txq_ctx,
                    ice_tlan_ctx_info);
 
        txq->qtx_tail = hw->hw_addr + QTX_COMM_DBELL(txq->reg_idx);
@@ -499,15 +665,18 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
        /* Fix me, we assume TC always 0 here */
        err = ice_ena_vsi_txq(hw->port_info, vsi->idx, 0, tx_queue_id, 1,
-                       &txq_elem, sizeof(txq_elem), NULL);
+                       txq_elem, buf_len, NULL);
        if (err) {
                PMD_DRV_LOG(ERR, "Failed to add lan txq");
+               rte_free(txq_elem);
                return -EIO;
        }
        /* store the schedule node id */
-       txq->q_teid = txq_elem.txqs[0].q_teid;
+       txq->q_teid = txq_elem->txqs[0].q_teid;
 
        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+
+       rte_free(txq_elem);
        return 0;
 }
 
@@ -516,7 +685,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 {
        struct ice_vsi *vsi = rxq->vsi;
        struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-       uint32_t rxdid = ICE_RXDID_COMMS_GENERIC;
+       uint32_t rxdid = ICE_RXDID_LEGACY_1;
        struct ice_rlan_ctx rx_ctx;
        enum ice_status err;
        uint32_t regval;
@@ -531,9 +700,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
        rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
        rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
        rx_ctx.dtype = 0; /* No Header Split mode */
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
        rx_ctx.dsize = 1; /* 32B descriptors */
-#endif
        rx_ctx.rxmax = RTE_ETHER_MAX_LEN;
        /* TPH: Transaction Layer Packet (TLP) processing hints */
        rx_ctx.tphrdesc_ena = 1;
@@ -613,7 +780,7 @@ ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        /* Init the RX tail register. */
        ICE_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
 
-       err = ice_switch_rx_queue(hw, rxq->reg_idx, TRUE);
+       err = ice_switch_rx_queue(hw, rxq->reg_idx, true);
        if (err) {
                PMD_DRV_LOG(ERR, "Failed to switch FDIR RX queue %u on",
                            rx_queue_id);
@@ -633,8 +800,9 @@ ice_fdir_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        int err;
        struct ice_vsi *vsi;
        struct ice_hw *hw;
-       struct ice_aqc_add_tx_qgrp txq_elem;
+       struct ice_aqc_add_tx_qgrp *txq_elem;
        struct ice_tlan_ctx tx_ctx;
+       int buf_len;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -645,13 +813,17 @@ ice_fdir_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                return -EINVAL;
        }
 
+       buf_len = ice_struct_size(txq_elem, txqs, 1);
+       txq_elem = ice_malloc(hw, buf_len);
+       if (!txq_elem)
+               return -ENOMEM;
+
        vsi = txq->vsi;
        hw = ICE_VSI_TO_HW(vsi);
 
-       memset(&txq_elem, 0, sizeof(txq_elem));
        memset(&tx_ctx, 0, sizeof(tx_ctx));
-       txq_elem.num_txqs = 1;
-       txq_elem.txqs[0].txq_id = rte_cpu_to_le_16(txq->reg_idx);
+       txq_elem->num_txqs = 1;
+       txq_elem->txqs[0].txq_id = rte_cpu_to_le_16(txq->reg_idx);
 
        tx_ctx.base = txq->tx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
        tx_ctx.qlen = txq->nb_tx_desc;
@@ -663,7 +835,7 @@ ice_fdir_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        tx_ctx.tso_qnum = txq->reg_idx; /* index for tso state structure */
        tx_ctx.legacy_int = 1; /* Legacy or Advanced Host Interface */
 
-       ice_set_ctx((uint8_t *)&tx_ctx, txq_elem.txqs[0].txq_ctx,
+       ice_set_ctx(hw, (uint8_t *)&tx_ctx, txq_elem->txqs[0].txq_ctx,
                    ice_tlan_ctx_info);
 
        txq->qtx_tail = hw->hw_addr + QTX_COMM_DBELL(txq->reg_idx);
@@ -673,14 +845,16 @@ ice_fdir_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
        /* Fix me, we assume TC always 0 here */
        err = ice_ena_vsi_txq(hw->port_info, vsi->idx, 0, tx_queue_id, 1,
-                             &txq_elem, sizeof(txq_elem), NULL);
+                             txq_elem, buf_len, NULL);
        if (err) {
                PMD_DRV_LOG(ERR, "Failed to add FDIR txq");
+               rte_free(txq_elem);
                return -EIO;
        }
        /* store the schedule node id */
-       txq->q_teid = txq_elem.txqs[0].q_teid;
+       txq->q_teid = txq_elem->txqs[0].q_teid;
 
+       rte_free(txq_elem);
        return 0;
 }
 
@@ -702,11 +876,6 @@ _ice_tx_queue_release_mbufs(struct ice_tx_queue *txq)
                }
        }
 }
-static void
-ice_tx_queue_release_mbufs(struct ice_tx_queue *txq)
-{
-       txq->tx_rel_mbufs(txq);
-}
 
 static void
 ice_reset_tx_queue(struct ice_tx_queue *txq)
@@ -782,7 +951,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                return -EINVAL;
        }
 
-       ice_tx_queue_release_mbufs(txq);
+       txq->tx_rel_mbufs(txq);
        ice_reset_tx_queue(txq);
        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -799,13 +968,13 @@ ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 
        rxq = pf->fdir.rxq;
 
-       err = ice_switch_rx_queue(hw, rxq->reg_idx, FALSE);
+       err = ice_switch_rx_queue(hw, rxq->reg_idx, false);
        if (err) {
                PMD_DRV_LOG(ERR, "Failed to switch FDIR RX queue %u off",
                            rx_queue_id);
                return -EINVAL;
        }
-       ice_rx_queue_release_mbufs(rxq);
+       rxq->rx_rel_mbufs(rxq);
 
        return 0;
 }
@@ -841,7 +1010,7 @@ ice_fdir_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                return -EINVAL;
        }
 
-       ice_tx_queue_release_mbufs(txq);
+       txq->tx_rel_mbufs(txq);
 
        return 0;
 }
@@ -909,13 +1078,11 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
        /* Allocate the maximun number of RX ring hardware descriptor. */
        len = ICE_MAX_RING_DESC;
 
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
        /**
         * Allocating a little more memory because vectorized/bulk_alloc Rx
         * functions doesn't check boundaries each time.
         */
        len += ICE_RX_MAX_BURST;
-#endif
 
        /* Allocate the maximum number of RX ring hardware descriptor. */
        ring_size = sizeof(union ice_rx_flex_desc) * len;
@@ -935,11 +1102,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->rx_ring_dma = rz->iova;
        rxq->rx_ring = rz->addr;
 
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
+       /* always reserve more for bulk alloc */
        len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
-#else
-       len = nb_desc;
-#endif
 
        /* Allocate the software ring. */
        rxq->sw_ring = rte_zmalloc_socket(NULL,
@@ -953,24 +1117,21 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        ice_reset_rx_queue(rxq);
-       rxq->q_set = TRUE;
+       rxq->q_set = true;
        dev->data->rx_queues[queue_idx] = rxq;
        rxq->rx_rel_mbufs = _ice_rx_queue_release_mbufs;
 
        use_def_burst_func = ice_check_rx_burst_bulk_alloc_preconditions(rxq);
 
        if (!use_def_burst_func) {
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                             "satisfied. Rx Burst Bulk Alloc function will be "
                             "used on port=%d, queue=%d.",
                             rxq->port_id, rxq->queue_id);
-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */
        } else {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
-                            "not satisfied, Scattered Rx is requested, "
-                            "or RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC is "
-                            "not enabled on port=%d, queue=%d.",
+                            "not satisfied, Scattered Rx is requested. "
+                            "on port=%d, queue=%d.",
                             rxq->port_id, rxq->queue_id);
                ad->rx_bulk_alloc_allowed = false;
        }
@@ -988,7 +1149,7 @@ ice_rx_queue_release(void *rxq)
                return;
        }
 
-       ice_rx_queue_release_mbufs(q);
+       q->rx_rel_mbufs(q);
        rte_free(q->sw_ring);
        rte_free(q);
 }
@@ -1166,7 +1327,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
        }
 
        ice_reset_tx_queue(txq);
-       txq->q_set = TRUE;
+       txq->q_set = true;
        dev->data->tx_queues[queue_idx] = txq;
        txq->tx_rel_mbufs = _ice_tx_queue_release_mbufs;
        ice_set_tx_function_flag(dev, txq);
@@ -1184,7 +1345,7 @@ ice_tx_queue_release(void *txq)
                return;
        }
 
-       ice_tx_queue_release_mbufs(q);
+       q->tx_rel_mbufs(q);
        rte_free(q->sw_ring);
        rte_free(q);
 }
@@ -1325,51 +1486,6 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
                   mb->vlan_tci, mb->vlan_tci_outer);
 }
 
-#define ICE_RX_PROTO_XTR_VALID \
-       ((1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_S) | \
-        (1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_S))
-
-static inline void
-ice_rxd_to_pkt_fields(struct rte_mbuf *mb,
-                     volatile union ice_rx_flex_desc *rxdp)
-{
-       volatile struct ice_32b_rx_flex_desc_comms *desc =
-                       (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
-       uint16_t stat_err;
-
-       stat_err = rte_le_to_cpu_16(desc->status_error0);
-       if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
-               mb->ol_flags |= PKT_RX_RSS_HASH;
-               mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
-       }
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-       init_proto_xtr_flds(mb);
-
-       stat_err = rte_le_to_cpu_16(desc->status_error1);
-       if (stat_err & ICE_RX_PROTO_XTR_VALID) {
-               struct proto_xtr_flds *xtr = get_proto_xtr_flds(mb);
-
-               if (stat_err & (1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_S))
-                       xtr->u.raw.data0 =
-                               rte_le_to_cpu_16(desc->flex_ts.flex.aux0);
-
-               if (stat_err & (1 << ICE_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_S))
-                       xtr->u.raw.data1 =
-                               rte_le_to_cpu_16(desc->flex_ts.flex.aux1);
-
-               xtr->type = ice_rxdid_to_proto_xtr_type(desc->rxdid);
-               xtr->magic = PROTO_XTR_MAGIC_ID;
-       }
-
-       if (desc->flow_id != 0xFFFFFFFF) {
-               mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
-               mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
-       }
-#endif
-}
-
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
 #define ICE_LOOK_AHEAD 8
 #if (ICE_LOOK_AHEAD != 8)
 #error "PMD ICE: ICE_LOOK_AHEAD must be 8\n"
@@ -1427,7 +1543,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
                        mb->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M &
                                rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
                        ice_rxd_to_vlan_tci(mb, &rxdp[j]);
-                       ice_rxd_to_pkt_fields(mb, &rxdp[j]);
+                       rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
 
                        mb->ol_flags |= pkt_flags;
                }
@@ -1590,15 +1706,6 @@ ice_recv_pkts_bulk_alloc(void *rx_queue,
 
        return nb_rx;
 }
-#else
-static uint16_t
-ice_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
-                        struct rte_mbuf __rte_unused **rx_pkts,
-                        uint16_t __rte_unused nb_pkts)
-{
-       return 0;
-}
-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */
 
 static uint16_t
 ice_recv_scattered_pkts(void *rx_queue,
@@ -1733,7 +1840,7 @@ ice_recv_scattered_pkts(void *rx_queue,
                first_seg->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M &
                        rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
                ice_rxd_to_vlan_tci(first_seg, &rxd);
-               ice_rxd_to_pkt_fields(first_seg, &rxd);
+               rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd);
                pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
                first_seg->ol_flags |= pkt_flags;
                /* Prefetch data of first segment, if configured to do so. */
@@ -1761,7 +1868,7 @@ ice_recv_scattered_pkts(void *rx_queue,
                rx_id = (uint16_t)(rx_id == 0 ?
                                   (rxq->nb_rx_desc - 1) : (rx_id - 1));
                /* write TAIL register */
-               ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+               ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
                nb_hold = 0;
        }
        rxq->nb_rx_hold = nb_hold;
@@ -1842,15 +1949,17 @@ ice_dev_supported_ptypes_get(struct rte_eth_dev *dev)
                ptypes = ptypes_os;
 
        if (dev->rx_pkt_burst == ice_recv_pkts ||
-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
            dev->rx_pkt_burst == ice_recv_pkts_bulk_alloc ||
-#endif
            dev->rx_pkt_burst == ice_recv_scattered_pkts)
                return ptypes;
 
 #ifdef RTE_ARCH_X86
        if (dev->rx_pkt_burst == ice_recv_pkts_vec ||
            dev->rx_pkt_burst == ice_recv_scattered_pkts_vec ||
+#ifdef CC_AVX512_SUPPORT
+           dev->rx_pkt_burst == ice_recv_pkts_vec_avx512 ||
+           dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx512 ||
+#endif
            dev->rx_pkt_burst == ice_recv_pkts_vec_avx2 ||
            dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2)
                return ptypes;
@@ -1915,24 +2024,6 @@ ice_tx_descriptor_status(void *tx_queue, uint16_t offset)
        return RTE_ETH_TX_DESC_FULL;
 }
 
-void
-ice_clear_queues(struct rte_eth_dev *dev)
-{
-       uint16_t i;
-
-       PMD_INIT_FUNC_TRACE();
-
-       for (i = 0; i < dev->data->nb_tx_queues; i++) {
-               ice_tx_queue_release_mbufs(dev->data->tx_queues[i]);
-               ice_reset_tx_queue(dev->data->tx_queues[i]);
-       }
-
-       for (i = 0; i < dev->data->nb_rx_queues; i++) {
-               ice_rx_queue_release_mbufs(dev->data->rx_queues[i]);
-               ice_reset_rx_queue(dev->data->rx_queues[i]);
-       }
-}
-
 void
 ice_free_queues(struct rte_eth_dev *dev)
 {
@@ -1945,6 +2036,7 @@ ice_free_queues(struct rte_eth_dev *dev)
                        continue;
                ice_rx_queue_release(dev->data->rx_queues[i]);
                dev->data->rx_queues[i] = NULL;
+               rte_eth_dma_zone_free(dev, "rx_ring", i);
        }
        dev->data->nb_rx_queues = 0;
 
@@ -1953,6 +2045,7 @@ ice_free_queues(struct rte_eth_dev *dev)
                        continue;
                ice_tx_queue_release(dev->data->tx_queues[i]);
                dev->data->tx_queues[i] = NULL;
+               rte_eth_dma_zone_free(dev, "tx_ring", i);
        }
        dev->data->nb_tx_queues = 0;
 }
@@ -2010,7 +2103,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
         * don't need to allocate software ring and reset for the fdir
         * program queue just set the queue has been configured.
         */
-       txq->q_set = TRUE;
+       txq->q_set = true;
        pf->fdir.txq = txq;
 
        txq->tx_rel_mbufs = _ice_tx_queue_release_mbufs;
@@ -2045,7 +2138,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
        }
 
        /* Allocate RX hardware ring descriptors. */
-       ring_size = sizeof(union ice_rx_flex_desc) * ICE_FDIR_NUM_RX_DESC;
+       ring_size = sizeof(union ice_32byte_rx_desc) * ICE_FDIR_NUM_RX_DESC;
        ring_size = RTE_ALIGN(ring_size, ICE_DMA_MEM_ALIGN);
 
        rz = rte_eth_dma_zone_reserve(dev, "fdir_rx_ring",
@@ -2064,14 +2157,14 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
        rxq->rx_ring_dma = rz->iova;
        memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
-              sizeof(union ice_rx_flex_desc));
+              sizeof(union ice_32byte_rx_desc));
        rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
 
        /*
         * Don't need to allocate software ring and reset for the fdir
         * rx queue, just set the queue has been configured.
         */
-       rxq->q_set = TRUE;
+       rxq->q_set = true;
        pf->fdir.rxq = rxq;
 
        rxq->rx_rel_mbufs = _ice_rx_queue_release_mbufs;
@@ -2151,7 +2244,7 @@ ice_recv_pkts(void *rx_queue,
                rxm->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M &
                        rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
                ice_rxd_to_vlan_tci(rxm, &rxd);
-               ice_rxd_to_pkt_fields(rxm, &rxd);
+               rxq->rxd_to_pkt_fields(rxq, rxm, &rxd);
                pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
                rxm->ol_flags |= pkt_flags;
                /* copy old mbuf to rx_pkts */
@@ -2169,7 +2262,7 @@ ice_recv_pkts(void *rx_queue,
                rx_id = (uint16_t)(rx_id == 0 ?
                                   (rxq->nb_rx_desc - 1) : (rx_id - 1));
                /* write TAIL register */
-               ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+               ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
                nb_hold = 0;
        }
        rxq->nb_rx_hold = nb_hold;
@@ -2391,6 +2484,24 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ice_tx_offload tx_offload)
        return ctx_desc;
 }
 
+/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
+#define ICE_MAX_DATA_PER_TXD \
+       (ICE_TXD_QW1_TX_BUF_SZ_M >> ICE_TXD_QW1_TX_BUF_SZ_S)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+       struct rte_mbuf *txd = tx_pkt;
+       uint16_t count = 0;
+
+       while (txd != NULL) {
+               count += DIV_ROUND_UP(txd->data_len, ICE_MAX_DATA_PER_TXD);
+               txd = txd->next;
+       }
+
+       return count;
+}
+
 uint16_t
 ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -2410,6 +2521,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        uint32_t td_offset = 0;
        uint32_t td_tag = 0;
        uint16_t tx_last;
+       uint16_t slen;
        uint64_t buf_dma_addr;
        uint64_t ol_flags;
        union ice_tx_offload tx_offload = {0};
@@ -2422,12 +2534,14 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        /* Check if the descriptor ring needs to be cleaned. */
        if (txq->nb_tx_free < txq->tx_free_thresh)
-               ice_xmit_cleanup(txq);
+               (void)ice_xmit_cleanup(txq);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                tx_pkt = *tx_pkts++;
 
                td_cmd = 0;
+               td_tag = 0;
+               td_offset = 0;
                ol_flags = tx_pkt->ol_flags;
                tx_offload.l2_len = tx_pkt->l2_len;
                tx_offload.l3_len = tx_pkt->l3_len;
@@ -2441,8 +2555,15 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                /* The number of descriptors that must be allocated for
                 * a packet equals to the number of the segments of that
                 * packet plus the number of context descriptor if needed.
+                * Recalculate the needed tx descs when TSO enabled in case
+                * the mbuf data size exceeds max data size that hw allows
+                * per tx desc.
                 */
-               nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+               if (ol_flags & PKT_TX_TCP_SEG)
+                       nb_used = (uint16_t)(ice_calc_pkt_desc(tx_pkt) +
+                                            nb_ctx);
+               else
+                       nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
                tx_last = (uint16_t)(tx_id + nb_used - 1);
 
                /* Circular ring */
@@ -2479,10 +2600,9 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                                   &cd_tunneling_params);
 
                /* Enable checksum offloading */
-               if (ol_flags & ICE_TX_CKSUM_OFFLOAD_MASK) {
+               if (ol_flags & ICE_TX_CKSUM_OFFLOAD_MASK)
                        ice_txd_enable_checksum(ol_flags, &td_cmd,
                                                &td_offset, tx_offload);
-               }
 
                if (nb_ctx) {
                        /* Setup TX context descriptor if required */
@@ -2532,15 +2652,37 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        txe->mbuf = m_seg;
 
                        /* Setup TX Descriptor */
+                       slen = m_seg->data_len;
                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
+
+                       while ((ol_flags & PKT_TX_TCP_SEG) &&
+                               unlikely(slen > ICE_MAX_DATA_PER_TXD)) {
+                               txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+                               txd->cmd_type_offset_bsz =
+                               rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
+                               ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
+                               ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
+                               ((uint64_t)ICE_MAX_DATA_PER_TXD <<
+                                ICE_TXD_QW1_TX_BUF_SZ_S) |
+                               ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+
+                               buf_dma_addr += ICE_MAX_DATA_PER_TXD;
+                               slen -= ICE_MAX_DATA_PER_TXD;
+
+                               txe->last_id = tx_last;
+                               tx_id = txe->next_id;
+                               txe = txn;
+                               txd = &tx_ring[tx_id];
+                               txn = &sw_ring[txe->next_id];
+                       }
+
                        txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
                        txd->cmd_type_offset_bsz =
                                rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
-                               ((uint64_t)td_cmd  << ICE_TXD_QW1_CMD_S) |
+                               ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
                                ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
-                               ((uint64_t)m_seg->data_len  <<
-                                ICE_TXD_QW1_TX_BUF_SZ_S) |
-                               ((uint64_t)td_tag  << ICE_TXD_QW1_L2TAG1_S));
+                               ((uint64_t)slen << ICE_TXD_QW1_TX_BUF_SZ_S) |
+                               ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
 
                        txe->last_id = tx_last;
                        tx_id = txe->next_id;
@@ -2577,7 +2719,7 @@ end_of_tx:
        return nb_tx;
 }
 
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
 ice_tx_free_bufs(struct ice_tx_queue *txq)
 {
        struct ice_tx_entry *txep;
@@ -2613,6 +2755,116 @@ ice_tx_free_bufs(struct ice_tx_queue *txq)
        return txq->tx_rs_thresh;
 }
 
+static int
+ice_tx_done_cleanup_full(struct ice_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       struct ice_tx_entry *swr_ring = txq->sw_ring;
+       uint16_t i, tx_last, tx_id;
+       uint16_t nb_tx_free_last;
+       uint16_t nb_tx_to_clean;
+       uint32_t pkt_cnt;
+
+       /* Start free mbuf from the next of tx_tail */
+       tx_last = txq->tx_tail;
+       tx_id  = swr_ring[tx_last].next_id;
+
+       if (txq->nb_tx_free == 0 && ice_xmit_cleanup(txq))
+               return 0;
+
+       nb_tx_to_clean = txq->nb_tx_free;
+       nb_tx_free_last = txq->nb_tx_free;
+       if (!free_cnt)
+               free_cnt = txq->nb_tx_desc;
+
+       /* Loop through swr_ring to count the amount of
+        * freeable mubfs and packets.
+        */
+       for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+               for (i = 0; i < nb_tx_to_clean &&
+                       pkt_cnt < free_cnt &&
+                       tx_id != tx_last; i++) {
+                       if (swr_ring[tx_id].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+                               swr_ring[tx_id].mbuf = NULL;
+
+                               /*
+                                * last segment in the packet,
+                                * increment packet count
+                                */
+                               pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+                       }
+
+                       tx_id = swr_ring[tx_id].next_id;
+               }
+
+               if (txq->tx_rs_thresh > txq->nb_tx_desc -
+                       txq->nb_tx_free || tx_id == tx_last)
+                       break;
+
+               if (pkt_cnt < free_cnt) {
+                       if (ice_xmit_cleanup(txq))
+                               break;
+
+                       nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
+                       nb_tx_free_last = txq->nb_tx_free;
+               }
+       }
+
+       return (int)pkt_cnt;
+}
+
+#ifdef RTE_ARCH_X86
+static int
+ice_tx_done_cleanup_vec(struct ice_tx_queue *txq __rte_unused,
+                       uint32_t free_cnt __rte_unused)
+{
+       return -ENOTSUP;
+}
+#endif
+
+static int
+ice_tx_done_cleanup_simple(struct ice_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       int i, n, cnt;
+
+       if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
+               free_cnt = txq->nb_tx_desc;
+
+       cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
+
+       for (i = 0; i < cnt; i += n) {
+               if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
+                       break;
+
+               n = ice_tx_free_bufs(txq);
+
+               if (n == 0)
+                       break;
+       }
+
+       return i;
+}
+
+int
+ice_tx_done_cleanup(void *txq, uint32_t free_cnt)
+{
+       struct ice_tx_queue *q = (struct ice_tx_queue *)txq;
+       struct rte_eth_dev *dev = &rte_eth_devices[q->port_id];
+       struct ice_adapter *ad =
+               ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+#ifdef RTE_ARCH_X86
+       if (ad->tx_vec_allowed)
+               return ice_tx_done_cleanup_vec(q, free_cnt);
+#endif
+       if (ad->tx_simple_allowed)
+               return ice_tx_done_cleanup_simple(q, free_cnt);
+       else
+               return ice_tx_done_cleanup_full(q, free_cnt);
+}
+
 /* Populate 4 descriptors with data from 4 mbufs */
 static inline void
 tx4(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkts)
@@ -2725,7 +2977,7 @@ tx_xmit_pkts(struct ice_tx_queue *txq,
                txq->tx_tail = 0;
 
        /* Update the tx tail register */
-       ICE_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+       ICE_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
 
        return nb_pkts;
 }
@@ -2756,7 +3008,7 @@ ice_xmit_pkts_simple(void *tx_queue,
        return nb_tx;
 }
 
-void __attribute__((cold))
+void __rte_cold
 ice_set_rx_function(struct rte_eth_dev *dev)
 {
        PMD_INIT_FUNC_TRACE();
@@ -2765,10 +3017,12 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 #ifdef RTE_ARCH_X86
        struct ice_rx_queue *rxq;
        int i;
+       bool use_avx512 = false;
        bool use_avx2 = false;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-               if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+               if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
                        ad->rx_vec_allowed = true;
                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
                                rxq = dev->data->rx_queues[i];
@@ -2778,8 +3032,19 @@ ice_set_rx_function(struct rte_eth_dev *dev)
                                }
                        }
 
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+                       if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512 &&
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+#ifdef CC_AVX512_SUPPORT
+                               use_avx512 = true;
+#else
+                       PMD_DRV_LOG(NOTICE,
+                               "AVX512 is not supported in build env");
+#endif
+                       if (!use_avx512 &&
+                       (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                                use_avx2 = true;
 
                } else {
@@ -2789,20 +3054,41 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 
        if (ad->rx_vec_allowed) {
                if (dev->data->scattered_rx) {
-                       PMD_DRV_LOG(DEBUG,
+                       if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                               PMD_DRV_LOG(NOTICE,
+                                       "Using AVX512 Vector Scattered Rx (port %d).",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst =
+                                       ice_recv_scattered_pkts_vec_avx512;
+#endif
+                       } else {
+                               PMD_DRV_LOG(DEBUG,
                                        "Using %sVector Scattered Rx (port %d).",
                                        use_avx2 ? "avx2 " : "",
                                        dev->data->port_id);
-                       dev->rx_pkt_burst = use_avx2 ?
+                               dev->rx_pkt_burst = use_avx2 ?
                                        ice_recv_scattered_pkts_vec_avx2 :
                                        ice_recv_scattered_pkts_vec;
+                       }
                } else {
-                       PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
+                       if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                               PMD_DRV_LOG(NOTICE,
+                                       "Using AVX512 Vector Rx (port %d).",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst =
+                                       ice_recv_pkts_vec_avx512;
+#endif
+                       } else {
+                               PMD_DRV_LOG(DEBUG,
+                                       "Using %sVector Rx (port %d).",
                                        use_avx2 ? "avx2 " : "",
                                        dev->data->port_id);
-                       dev->rx_pkt_burst = use_avx2 ?
-                                               ice_recv_pkts_vec_avx2 :
-                                               ice_recv_pkts_vec;
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       ice_recv_pkts_vec_avx2 :
+                                       ice_recv_pkts_vec;
+                       }
                }
                return;
        }
@@ -2831,40 +3117,46 @@ ice_set_rx_function(struct rte_eth_dev *dev)
        }
 }
 
+static const struct {
+       eth_rx_burst_t pkt_burst;
+       const char *info;
+} ice_rx_burst_infos[] = {
+       { ice_recv_scattered_pkts,          "Scalar Scattered" },
+       { ice_recv_pkts_bulk_alloc,         "Scalar Bulk Alloc" },
+       { ice_recv_pkts,                    "Scalar" },
+#ifdef RTE_ARCH_X86
+#ifdef CC_AVX512_SUPPORT
+       { ice_recv_scattered_pkts_vec_avx512, "Vector AVX512 Scattered" },
+       { ice_recv_pkts_vec_avx512,           "Vector AVX512" },
+#endif
+       { ice_recv_scattered_pkts_vec_avx2, "Vector AVX2 Scattered" },
+       { ice_recv_pkts_vec_avx2,           "Vector AVX2" },
+       { ice_recv_scattered_pkts_vec,      "Vector SSE Scattered" },
+       { ice_recv_pkts_vec,                "Vector SSE" },
+#endif
+};
+
 int
 ice_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
                      struct rte_eth_burst_mode *mode)
 {
        eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
-       uint64_t options;
-
-       if (pkt_burst == ice_recv_scattered_pkts)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == ice_recv_pkts_bulk_alloc)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_BULK_ALLOC;
-       else if (pkt_burst == ice_recv_pkts)
-               options = RTE_ETH_BURST_SCALAR;
-#ifdef RTE_ARCH_X86
-       else if (pkt_burst == ice_recv_scattered_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2 |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == ice_recv_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2;
-       else if (pkt_burst == ice_recv_scattered_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == ice_recv_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE;
-#endif
-       else
-               options = 0;
+       int ret = -EINVAL;
+       unsigned int i;
 
-       mode->options = options;
+       for (i = 0; i < RTE_DIM(ice_rx_burst_infos); ++i) {
+               if (pkt_burst == ice_rx_burst_infos[i].pkt_burst) {
+                       snprintf(mode->info, sizeof(mode->info), "%s",
+                                ice_rx_burst_infos[i].info);
+                       ret = 0;
+                       break;
+               }
+       }
 
-       return options != 0 ? 0 : -EINVAL;
+       return ret;
 }
 
-void __attribute__((cold))
+void __rte_cold
 ice_set_tx_function_flag(struct rte_eth_dev *dev, struct ice_tx_queue *txq)
 {
        struct ice_adapter *ad =
@@ -2933,7 +3225,7 @@ ice_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
        return i;
 }
 
-void __attribute__((cold))
+void __rte_cold
 ice_set_tx_function(struct rte_eth_dev *dev)
 {
        struct ice_adapter *ad =
@@ -2941,10 +3233,12 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 #ifdef RTE_ARCH_X86
        struct ice_tx_queue *txq;
        int i;
+       bool use_avx512 = false;
        bool use_avx2 = false;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-               if (!ice_tx_vec_dev_check(dev)) {
+               if (!ice_tx_vec_dev_check(dev) &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
                        ad->tx_vec_allowed = true;
                        for (i = 0; i < dev->data->nb_tx_queues; i++) {
                                txq = dev->data->tx_queues[i];
@@ -2954,8 +3248,19 @@ ice_set_tx_function(struct rte_eth_dev *dev)
                                }
                        }
 
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+                       if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512 &&
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+#ifdef CC_AVX512_SUPPORT
+                               use_avx512 = true;
+#else
+                       PMD_DRV_LOG(NOTICE,
+                               "AVX512 is not supported in build env");
+#endif
+                       if (!use_avx512 &&
+                       (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
                                use_avx2 = true;
 
                } else {
@@ -2964,12 +3269,20 @@ ice_set_tx_function(struct rte_eth_dev *dev)
        }
 
        if (ad->tx_vec_allowed) {
-               PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-                           use_avx2 ? "avx2 " : "",
-                           dev->data->port_id);
-               dev->tx_pkt_burst = use_avx2 ?
-                                   ice_xmit_pkts_vec_avx2 :
-                                   ice_xmit_pkts_vec;
+               if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                       PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
+                                   dev->data->port_id);
+                       dev->tx_pkt_burst = ice_xmit_pkts_vec_avx512;
+#endif
+               } else {
+                       PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
+                                   use_avx2 ? "avx2 " : "",
+                                   dev->data->port_id);
+                       dev->tx_pkt_burst = use_avx2 ?
+                                           ice_xmit_pkts_vec_avx2 :
+                                           ice_xmit_pkts_vec;
+               }
                dev->tx_pkt_prepare = NULL;
 
                return;
@@ -2987,29 +3300,39 @@ ice_set_tx_function(struct rte_eth_dev *dev)
        }
 }
 
+static const struct {
+       eth_tx_burst_t pkt_burst;
+       const char *info;
+} ice_tx_burst_infos[] = {
+       { ice_xmit_pkts_simple,   "Scalar Simple" },
+       { ice_xmit_pkts,          "Scalar" },
+#ifdef RTE_ARCH_X86
+#ifdef CC_AVX512_SUPPORT
+       { ice_xmit_pkts_vec_avx512, "Vector AVX512" },
+#endif
+       { ice_xmit_pkts_vec_avx2, "Vector AVX2" },
+       { ice_xmit_pkts_vec,      "Vector SSE" },
+#endif
+};
+
 int
 ice_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
                      struct rte_eth_burst_mode *mode)
 {
        eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
-       uint64_t options;
-
-       if (pkt_burst == ice_xmit_pkts_simple)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_SIMPLE;
-       else if (pkt_burst == ice_xmit_pkts)
-               options = RTE_ETH_BURST_SCALAR;
-#ifdef RTE_ARCH_X86
-       else if (pkt_burst == ice_xmit_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2;
-       else if (pkt_burst == ice_xmit_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE;
-#endif
-       else
-               options = 0;
+       int ret = -EINVAL;
+       unsigned int i;
 
-       mode->options = options;
+       for (i = 0; i < RTE_DIM(ice_tx_burst_infos); ++i) {
+               if (pkt_burst == ice_tx_burst_infos[i].pkt_burst) {
+                       snprintf(mode->info, sizeof(mode->info), "%s",
+                                ice_tx_burst_infos[i].info);
+                       ret = 0;
+                       break;
+               }
+       }
 
-       return options != 0 ? 0 : -EINVAL;
+       return ret;
 }
 
 /* For each value it means, datasheet of hardware can tell more details
@@ -3229,7 +3552,7 @@ ice_get_default_pkt_type(uint16_t ptype)
                       RTE_PTYPE_L4_TCP,
                [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                       RTE_PTYPE_L4_SCTP,
-               [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+               [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                       RTE_PTYPE_L4_ICMP,
 
                /* IPv6 --> IPv4 */
@@ -3555,7 +3878,7 @@ ice_get_default_pkt_type(uint16_t ptype)
        return type_table[ptype];
 }
 
-void __attribute__((cold))
+void __rte_cold
 ice_set_default_ptype_table(struct rte_eth_dev *dev)
 {
        struct ice_adapter *ad =
@@ -3566,12 +3889,81 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
                ad->ptype_tbl[i] = ice_get_default_pkt_type(i);
 }
 
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_PROGID_S        1
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_PROGID_M        \
+                       (0x3UL << ICE_RX_PROG_STATUS_DESC_WB_QW1_PROGID_S)
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_PROG_ADD 0
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_PROG_DEL 0x1
+
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_S  4
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_M  \
+       (1 << ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_S)
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_PROF_S     5
+#define ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_PROF_M     \
+       (1 << ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_PROF_S)
+
+/*
+ * check the programming status descriptor in rx queue.
+ * done after Programming Flow Director is programmed on
+ * tx queue
+ */
+static inline int
+ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+{
+       volatile union ice_32byte_rx_desc *rxdp;
+       uint64_t qword1;
+       uint32_t rx_status;
+       uint32_t error;
+       uint32_t id;
+       int ret = -EAGAIN;
+
+       rxdp = (volatile union ice_32byte_rx_desc *)
+               (&rxq->rx_ring[rxq->rx_tail]);
+       qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
+       rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
+                       >> ICE_RXD_QW1_STATUS_S;
+
+       if (rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)) {
+               ret = 0;
+               error = (qword1 & ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_M) >>
+                       ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_S;
+               id = (qword1 & ICE_RX_PROG_STATUS_DESC_WB_QW1_PROGID_M) >>
+                       ICE_RX_PROG_STATUS_DESC_WB_QW1_PROGID_S;
+               if (error) {
+                       if (id == ICE_RX_PROG_STATUS_DESC_WB_QW1_PROG_ADD)
+                               PMD_DRV_LOG(ERR, "Failed to add FDIR rule.");
+                       else if (id == ICE_RX_PROG_STATUS_DESC_WB_QW1_PROG_DEL)
+                               PMD_DRV_LOG(ERR, "Failed to remove FDIR rule.");
+                       ret = -EINVAL;
+                       goto err;
+               }
+               error = (qword1 & ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_PROF_M) >>
+                       ICE_RX_PROG_STATUS_DESC_WB_QW1_FAIL_PROF_S;
+               if (error) {
+                       PMD_DRV_LOG(ERR, "Failed to create FDIR profile.");
+                       ret = -EINVAL;
+               }
+err:
+               rxdp->wb.qword1.status_error_len = 0;
+               rxq->rx_tail++;
+               if (unlikely(rxq->rx_tail == rxq->nb_rx_desc))
+                       rxq->rx_tail = 0;
+               if (rxq->rx_tail == 0)
+                       ICE_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
+               else
+                       ICE_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_tail - 1);
+       }
+
+       return ret;
+}
+
 #define ICE_FDIR_MAX_WAIT_US 10000
 
 int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
        struct ice_tx_queue *txq = pf->fdir.txq;
+       struct ice_rx_queue *rxq = pf->fdir.rxq;
        volatile struct ice_fltr_desc *fdirdp;
        volatile struct ice_tx_desc *txdp;
        uint32_t td_cmd;
@@ -3609,5 +4001,19 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
                return -ETIMEDOUT;
        }
 
-       return 0;
+       for (; i < ICE_FDIR_MAX_WAIT_US; i++) {
+               int ret;
+
+               ret = ice_check_fdir_programming_status(rxq);
+               if (ret == -EAGAIN)
+                       rte_delay_us(1);
+               else
+                       return ret;
+       }
+
+       PMD_DRV_LOG(ERR,
+                   "Failed to program FDIR filter: programming status reported.");
+       return -ETIMEDOUT;
+
+
 }