net/bnxt: fix mark id update to mbuf
[dpdk.git] / drivers / net / bnxt / bnxt_rxr.c
index bee67d3..43b1256 100644 (file)
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) Broadcom Limited.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Broadcom Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2014-2018 Broadcom
+ * All rights reserved.
  */
 
 #include <inttypes.h>
 #include <rte_memory.h>
 
 #include "bnxt.h"
-#include "bnxt_cpr.h"
+#include "bnxt_reps.h"
 #include "bnxt_ring.h"
 #include "bnxt_rxr.h"
 #include "bnxt_rxq.h"
 #include "hsi_struct_def_dpdk.h"
+#ifdef RTE_LIBRTE_IEEE1588
+#include "bnxt_hwrm.h"
+#endif
+
+#include <bnxt_tf_common.h>
+#include <ulp_mark_mgr.h>
 
 /*
  * RX Ring handling
@@ -65,17 +43,18 @@ static inline int bnxt_alloc_rx_data(struct bnxt_rx_queue *rxq,
 {
        struct rx_prod_pkt_bd *rxbd = &rxr->rx_desc_ring[prod];
        struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod];
-       struct rte_mbuf *data;
+       struct rte_mbuf *mbuf;
 
-       data = __bnxt_alloc_rx_data(rxq->mb_pool);
-       if (!data) {
-               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
+       mbuf = __bnxt_alloc_rx_data(rxq->mb_pool);
+       if (!mbuf) {
+               rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail);
                return -ENOMEM;
        }
 
-       rx_buf->mbuf = data;
+       rx_buf->mbuf = mbuf;
+       mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 
-       rxbd->addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR(rx_buf->mbuf));
+       rxbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
        return 0;
 }
@@ -86,23 +65,28 @@ static inline int bnxt_alloc_ag_data(struct bnxt_rx_queue *rxq,
 {
        struct rx_prod_pkt_bd *rxbd = &rxr->ag_desc_ring[prod];
        struct bnxt_sw_rx_bd *rx_buf = &rxr->ag_buf_ring[prod];
-       struct rte_mbuf *data;
+       struct rte_mbuf *mbuf;
 
-       data = __bnxt_alloc_rx_data(rxq->mb_pool);
-       if (!data) {
-               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
-               return -ENOMEM;
+       if (rxbd == NULL) {
+               PMD_DRV_LOG(ERR, "Jumbo Frame. rxbd is NULL\n");
+               return -EINVAL;
        }
 
-       if (rxbd == NULL)
-               RTE_LOG(ERR, PMD, "Jumbo Frame. rxbd is NULL\n");
-       if (rx_buf == NULL)
-               RTE_LOG(ERR, PMD, "Jumbo Frame. rx_buf is NULL\n");
+       if (rx_buf == NULL) {
+               PMD_DRV_LOG(ERR, "Jumbo Frame. rx_buf is NULL\n");
+               return -EINVAL;
+       }
 
+       mbuf = __bnxt_alloc_rx_data(rxq->mb_pool);
+       if (!mbuf) {
+               rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail);
+               return -ENOMEM;
+       }
 
-       rx_buf->mbuf = data;
+       rx_buf->mbuf = mbuf;
+       mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 
-       rxbd->addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR(rx_buf->mbuf));
+       rxbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
        return 0;
 }
@@ -123,30 +107,11 @@ static inline void bnxt_reuse_rx_mbuf(struct bnxt_rx_ring_info *rxr,
 
        prod_bd = &rxr->rx_desc_ring[prod];
 
-       prod_bd->addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+       prod_bd->address = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
        rxr->rx_prod = prod;
 }
 
-#ifdef BNXT_DEBUG
-static void bnxt_reuse_ag_mbuf(struct bnxt_rx_ring_info *rxr, uint16_t cons,
-                              struct rte_mbuf *mbuf)
-{
-       uint16_t prod = rxr->ag_prod;
-       struct bnxt_sw_rx_bd *prod_rx_buf;
-       struct rx_prod_pkt_bd *prod_bd, *cons_bd;
-
-       prod_rx_buf = &rxr->ag_buf_ring[prod];
-
-       prod_rx_buf->mbuf = mbuf;
-
-       prod_bd = &rxr->ag_desc_ring[prod];
-       cons_bd = &rxr->ag_desc_ring[cons];
-
-       prod_bd->addr = cons_bd->addr;
-}
-#endif
-
 static inline
 struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr,
                                     uint16_t cons)
@@ -166,12 +131,13 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
                           struct rx_tpa_start_cmpl_hi *tpa_start1)
 {
        struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
-       uint8_t agg_id = rte_le_to_cpu_32(tpa_start->agg_id &
-               RX_TPA_START_CMPL_AGG_ID_MASK) >> RX_TPA_START_CMPL_AGG_ID_SFT;
+       uint16_t agg_id;
        uint16_t data_cons;
        struct bnxt_tpa_info *tpa_info;
        struct rte_mbuf *mbuf;
 
+       agg_id = bnxt_tpa_start_agg_id(rxq->bp, tpa_start);
+
        data_cons = tpa_start->opaque;
        tpa_info = &rxr->tpa_info[agg_id];
 
@@ -179,6 +145,7 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
 
        bnxt_reuse_rx_mbuf(rxr, tpa_info->mbuf);
 
+       tpa_info->agg_count = 0;
        tpa_info->mbuf = mbuf;
        tpa_info->len = rte_le_to_cpu_32(tpa_start->len);
 
@@ -199,7 +166,7 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
        if (tpa_start1->flags2 &
            rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_META_FORMAT_VLAN)) {
                mbuf->vlan_tci = rte_le_to_cpu_32(tpa_start1->metadata);
-               mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+               mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
        }
        if (likely(tpa_start1->flags2 &
                   rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_L4_CS_CALC)))
@@ -219,6 +186,9 @@ static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr,
        raw_cp_cons = ADV_RAW_CMP(raw_cp_cons, agg_bufs);
        last_cp_cons = RING_CMP(cpr->cp_ring_struct, raw_cp_cons);
        agg_cmpl = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[last_cp_cons];
+       cpr->valid = FLIP_VALID(raw_cp_cons,
+                               cpr->cp_ring_struct->ring_mask,
+                               cpr->valid);
        return CMP_VALID(agg_cmpl, raw_cp_cons, cpr->cp_ring_struct);
 }
 
@@ -231,7 +201,7 @@ static int bnxt_prod_ag_mbuf(struct bnxt_rx_queue *rxq)
        /* TODO batch allocation for better performance */
        while (rte_bitmap_get(rxr->ag_bitmap, next)) {
                if (unlikely(bnxt_alloc_ag_data(rxq, rxr, next))) {
-                       RTE_LOG(ERR, PMD,
+                       PMD_DRV_LOG(ERR,
                                "agg mbuf alloc failed: prod=0x%x\n", next);
                        break;
                }
@@ -245,7 +215,7 @@ static int bnxt_prod_ag_mbuf(struct bnxt_rx_queue *rxq)
 
 static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
                         struct rte_mbuf *mbuf, uint32_t *tmp_raw_cons,
-                        uint8_t agg_buf)
+                        uint8_t agg_buf, struct bnxt_tpa_info *tpa_info)
 {
        struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
        struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
@@ -253,14 +223,20 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
        uint16_t cp_cons, ag_cons;
        struct rx_pkt_cmpl *rxcmp;
        struct rte_mbuf *last = mbuf;
+       bool is_thor_tpa = tpa_info && BNXT_CHIP_THOR(rxq->bp);
 
        for (i = 0; i < agg_buf; i++) {
                struct bnxt_sw_rx_bd *ag_buf;
                struct rte_mbuf *ag_mbuf;
-               *tmp_raw_cons = NEXT_RAW_CMP(*tmp_raw_cons);
-               cp_cons = RING_CMP(cpr->cp_ring_struct, *tmp_raw_cons);
-               rxcmp = (struct rx_pkt_cmpl *)
+
+               if (is_thor_tpa) {
+                       rxcmp = (void *)&tpa_info->agg_arr[i];
+               } else {
+                       *tmp_raw_cons = NEXT_RAW_CMP(*tmp_raw_cons);
+                       cp_cons = RING_CMP(cpr->cp_ring_struct, *tmp_raw_cons);
+                       rxcmp = (struct rx_pkt_cmpl *)
                                        &cpr->cp_desc_ring[cp_cons];
+               }
 
 #ifdef BNXT_DEBUG
                bnxt_dump_cmpl(cp_cons, rxcmp);
@@ -297,34 +273,47 @@ static inline struct rte_mbuf *bnxt_tpa_end(
                struct bnxt_rx_queue *rxq,
                uint32_t *raw_cp_cons,
                struct rx_tpa_end_cmpl *tpa_end,
-               struct rx_tpa_end_cmpl_hi *tpa_end1 __rte_unused)
+               struct rx_tpa_end_cmpl_hi *tpa_end1)
 {
        struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
        struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
-       uint8_t agg_id = (tpa_end->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK)
-                       >> RX_TPA_END_CMPL_AGG_ID_SFT;
+       uint16_t agg_id;
        struct rte_mbuf *mbuf;
        uint8_t agg_bufs;
+       uint8_t payload_offset;
        struct bnxt_tpa_info *tpa_info;
 
+       if (BNXT_CHIP_THOR(rxq->bp)) {
+               struct rx_tpa_v2_end_cmpl *th_tpa_end;
+               struct rx_tpa_v2_end_cmpl_hi *th_tpa_end1;
+
+               th_tpa_end = (void *)tpa_end;
+               th_tpa_end1 = (void *)tpa_end1;
+               agg_id = BNXT_TPA_END_AGG_ID_TH(th_tpa_end);
+               agg_bufs = BNXT_TPA_END_AGG_BUFS_TH(th_tpa_end1);
+               payload_offset = th_tpa_end1->payload_offset;
+       } else {
+               agg_id = BNXT_TPA_END_AGG_ID(tpa_end);
+               agg_bufs = BNXT_TPA_END_AGG_BUFS(tpa_end);
+               if (!bnxt_agg_bufs_valid(cpr, agg_bufs, *raw_cp_cons))
+                       return NULL;
+               payload_offset = tpa_end->payload_offset;
+       }
+
        tpa_info = &rxr->tpa_info[agg_id];
        mbuf = tpa_info->mbuf;
        RTE_ASSERT(mbuf != NULL);
 
        rte_prefetch0(mbuf);
-       agg_bufs = (rte_le_to_cpu_32(tpa_end->agg_bufs_v1) &
-               RX_TPA_END_CMPL_AGG_BUFS_MASK) >> RX_TPA_END_CMPL_AGG_BUFS_SFT;
        if (agg_bufs) {
-               if (!bnxt_agg_bufs_valid(cpr, agg_bufs, *raw_cp_cons))
-                       return NULL;
-               bnxt_rx_pages(rxq, mbuf, raw_cp_cons, agg_bufs);
+               bnxt_rx_pages(rxq, mbuf, raw_cp_cons, agg_bufs, tpa_info);
        }
-       mbuf->l4_len = tpa_end->payload_offset;
+       mbuf->l4_len = payload_offset;
 
        struct rte_mbuf *new_data = __bnxt_alloc_rx_data(rxq->mb_pool);
        RTE_ASSERT(new_data != NULL);
        if (!new_data) {
-               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
+               rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail);
                return NULL;
        }
        tpa_info->mbuf = new_data;
@@ -332,8 +321,229 @@ static inline struct rte_mbuf *bnxt_tpa_end(
        return mbuf;
 }
 
+static uint32_t
+bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+{
+       uint32_t l3, pkt_type = 0;
+       uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
+       uint32_t flags_type;
+
+       vlan = !!(rxcmp1->flags2 &
+               rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
+       pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
+
+       t_ipcs = !!(rxcmp1->flags2 &
+               rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
+       ip6 = !!(rxcmp1->flags2 &
+                rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
+
+       flags_type = rxcmp->flags_type &
+               rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
+
+       if (!t_ipcs && !ip6)
+               l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+       else if (!t_ipcs && ip6)
+               l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+       else if (t_ipcs && !ip6)
+               l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
+       else
+               l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+
+       switch (flags_type) {
+       case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
+               if (!t_ipcs)
+                       pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
+               else
+                       pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
+               break;
+
+       case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
+               if (!t_ipcs)
+                       pkt_type |= l3 | RTE_PTYPE_L4_TCP;
+               else
+                       pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
+               break;
+
+       case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
+               if (!t_ipcs)
+                       pkt_type |= l3 | RTE_PTYPE_L4_UDP;
+               else
+                       pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
+               break;
+
+       case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
+               pkt_type |= l3;
+               break;
+       }
+
+       return pkt_type;
+}
+
+#ifdef RTE_LIBRTE_IEEE1588
+static void
+bnxt_get_rx_ts_thor(struct bnxt *bp, uint32_t rx_ts_cmpl)
+{
+       uint64_t systime_cycles = 0;
+
+       if (!BNXT_CHIP_THOR(bp))
+               return;
+
+       /* On Thor, Rx timestamps are provided directly in the
+        * Rx completion records to the driver. Only 32 bits of
+        * the timestamp is present in the completion. Driver needs
+        * to read the current 48 bit free running timer using the
+        * HWRM_PORT_TS_QUERY command and combine the upper 16 bits
+        * from the HWRM response with the lower 32 bits in the
+        * Rx completion to produce the 48 bit timestamp for the Rx packet
+        */
+       bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME,
+                               &systime_cycles);
+       bp->ptp_cfg->rx_timestamp = (systime_cycles & 0xFFFF00000000);
+       bp->ptp_cfg->rx_timestamp |= rx_ts_cmpl;
+}
+#endif
+
+static uint32_t
+bnxt_ulp_set_mark_in_mbuf(struct bnxt *bp, struct rx_pkt_cmpl_hi *rxcmp1,
+                         struct rte_mbuf *mbuf, uint32_t *vfr_flag)
+{
+       uint32_t cfa_code;
+       uint32_t meta_fmt;
+       uint32_t meta;
+       bool gfid = false;
+       uint32_t mark_id;
+       uint32_t flags2;
+       uint32_t gfid_support = 0;
+       int rc;
+
+       if (BNXT_GFID_ENABLED(bp))
+               gfid_support = 1;
+
+       cfa_code = rte_le_to_cpu_16(rxcmp1->cfa_code);
+       flags2 = rte_le_to_cpu_32(rxcmp1->flags2);
+       meta = rte_le_to_cpu_32(rxcmp1->metadata);
+
+       /*
+        * The flags field holds extra bits of info from [6:4]
+        * which indicate if the flow is in TCAM or EM or EEM
+        */
+       meta_fmt = (flags2 & BNXT_CFA_META_FMT_MASK) >>
+               BNXT_CFA_META_FMT_SHFT;
+
+       switch (meta_fmt) {
+       case 0:
+               if (gfid_support) {
+                       /* Not an LFID or GFID, a flush cmd. */
+                       goto skip_mark;
+               } else {
+                       /* LFID mode, no vlan scenario */
+                       gfid = false;
+               }
+               break;
+       case 4:
+       case 5:
+               /*
+                * EM/TCAM case
+                * Assume that EM doesn't support Mark due to GFID
+                * collisions with EEM.  Simply return without setting the mark
+                * in the mbuf.
+                */
+               if (BNXT_CFA_META_EM_TEST(meta)) {
+                       /*This is EM hit {EM(1), GFID[27:16], 19'd0 or vtag } */
+                       gfid = true;
+                       meta >>= BNXT_RX_META_CFA_CODE_SHIFT;
+                       cfa_code |= meta << BNXT_CFA_CODE_META_SHIFT;
+               } else {
+                       /*
+                        * It is a TCAM entry, so it is an LFID.
+                        * The TCAM IDX and Mode can also be determined
+                        * by decoding the meta_data. We are not
+                        * using these for now.
+                        */
+               }
+               break;
+       case 6:
+       case 7:
+               /* EEM Case, only using gfid in EEM for now. */
+               gfid = true;
+
+               /*
+                * For EEM flows, The first part of cfa_code is 16 bits.
+                * The second part is embedded in the
+                * metadata field from bit 19 onwards. The driver needs to
+                * ignore the first 19 bits of metadata and use the next 12
+                * bits as higher 12 bits of cfa_code.
+                */
+               meta >>= BNXT_RX_META_CFA_CODE_SHIFT;
+               cfa_code |= meta << BNXT_CFA_CODE_META_SHIFT;
+               break;
+       default:
+               /* For other values, the cfa_code is assumed to be an LFID. */
+               break;
+       }
+
+       rc = ulp_mark_db_mark_get(bp->ulp_ctx, gfid,
+                                 cfa_code, vfr_flag, &mark_id);
+       if (!rc) {
+               /* VF to VFR Rx path. So, skip mark_id injection in mbuf */
+               if (vfr_flag && *vfr_flag)
+                       return mark_id;
+               /* Got the mark, write it to the mbuf and return */
+               mbuf->hash.fdir.hi = mark_id;
+               mbuf->udata64 = (cfa_code & 0xffffffffull) << 32;
+               mbuf->hash.fdir.id = rxcmp1->cfa_code;
+               mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+               return mark_id;
+       }
+
+skip_mark:
+       mbuf->hash.fdir.hi = 0;
+       mbuf->hash.fdir.id = 0;
+
+       return 0;
+}
+
+void bnxt_set_mark_in_mbuf(struct bnxt *bp,
+                          struct rx_pkt_cmpl_hi *rxcmp1,
+                          struct rte_mbuf *mbuf)
+{
+       uint32_t cfa_code = 0;
+       uint8_t meta_fmt = 0;
+       uint16_t flags2 = 0;
+       uint32_t meta =  0;
+
+       cfa_code = rte_le_to_cpu_16(rxcmp1->cfa_code);
+       if (!cfa_code)
+               return;
+
+       if (cfa_code && !bp->mark_table[cfa_code].valid)
+               return;
+
+       flags2 = rte_le_to_cpu_16(rxcmp1->flags2);
+       meta = rte_le_to_cpu_32(rxcmp1->metadata);
+       if (meta) {
+               meta >>= BNXT_RX_META_CFA_CODE_SHIFT;
+
+               /* The flags field holds extra bits of info from [6:4]
+                * which indicate if the flow is in TCAM or EM or EEM
+                */
+               meta_fmt = (flags2 & BNXT_CFA_META_FMT_MASK) >>
+                          BNXT_CFA_META_FMT_SHFT;
+
+               /* meta_fmt == 4 => 'b100 => 'b10x => EM.
+                * meta_fmt == 5 => 'b101 => 'b10x => EM + VLAN
+                * meta_fmt == 6 => 'b110 => 'b11x => EEM
+                * meta_fmt == 7 => 'b111 => 'b11x => EEM + VLAN.
+                */
+               meta_fmt >>= BNXT_CFA_META_FMT_EM_EEM_SHFT;
+       }
+
+       mbuf->hash.fdir.hi = bp->mark_table[cfa_code].mark_id;
+       mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+}
+
 static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
-                           struct bnxt_rx_queue *rxq, uint32_t *raw_cons)
+                      struct bnxt_rx_queue *rxq, uint32_t *raw_cons)
 {
        struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
        struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
@@ -342,17 +552,31 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
        uint32_t tmp_raw_cons = *raw_cons;
        uint16_t cons, prod, cp_cons =
            RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
-#ifdef BNXT_DEBUG
-       uint16_t ag_cons;
-#endif
        struct rte_mbuf *mbuf;
        int rc = 0;
        uint8_t agg_buf = 0;
        uint16_t cmp_type;
+       uint32_t flags2_f = 0, vfr_flag = 0, mark_id = 0;
+       uint16_t flags_type;
+       struct bnxt *bp = rxq->bp;
 
        rxcmp = (struct rx_pkt_cmpl *)
            &cpr->cp_desc_ring[cp_cons];
 
+       cmp_type = CMP_TYPE(rxcmp);
+
+       if (cmp_type == RX_TPA_V2_ABUF_CMPL_TYPE_RX_TPA_AGG) {
+               struct rx_tpa_v2_abuf_cmpl *rx_agg = (void *)rxcmp;
+               uint16_t agg_id = rte_cpu_to_le_16(rx_agg->agg_id);
+               struct bnxt_tpa_info *tpa_info;
+
+               tpa_info = &rxr->tpa_info[agg_id];
+               RTE_ASSERT(tpa_info->agg_count < 16);
+               tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg;
+               rc = -EINVAL; /* Continue w/o new mbuf */
+               goto next_rx;
+       }
+
        tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
        cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
        rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons];
@@ -360,13 +584,16 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
        if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct))
                return -EBUSY;
 
-       cmp_type = CMP_TYPE(rxcmp);
-       if (cmp_type == RX_PKT_CMPL_TYPE_RX_L2_TPA_START) {
+       cpr->valid = FLIP_VALID(cp_cons,
+                               cpr->cp_ring_struct->ring_mask,
+                               cpr->valid);
+
+       if (cmp_type == RX_TPA_START_CMPL_TYPE_RX_TPA_START) {
                bnxt_tpa_start(rxq, (struct rx_tpa_start_cmpl *)rxcmp,
                               (struct rx_tpa_start_cmpl_hi *)rxcmp1);
                rc = -EINVAL; /* Continue w/o new mbuf */
                goto next_rx;
-       } else if (cmp_type == RX_PKT_CMPL_TYPE_RX_L2_TPA_END) {
+       } else if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
                mbuf = bnxt_tpa_end(rxq, &tmp_raw_cons,
                                   (struct rx_tpa_end_cmpl *)rxcmp,
                                   (struct rx_tpa_end_cmpl_hi *)rxcmp1);
@@ -388,42 +615,97 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 
        cons = rxcmp->opaque;
        mbuf = bnxt_consume_rx_buf(rxr, cons);
-       rte_prefetch0(mbuf);
-
        if (mbuf == NULL)
-               return -ENOMEM;
+               return -EBUSY;
+
+       rte_prefetch0(mbuf);
 
+       mbuf->data_off = RTE_PKTMBUF_HEADROOM;
        mbuf->nb_segs = 1;
        mbuf->next = NULL;
        mbuf->pkt_len = rxcmp->len;
        mbuf->data_len = mbuf->pkt_len;
        mbuf->port = rxq->port_id;
        mbuf->ol_flags = 0;
-       if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
+
+       flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+       if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
                mbuf->hash.rss = rxcmp->rss_hash;
                mbuf->ol_flags |= PKT_RX_RSS_HASH;
-       } else {
-               mbuf->hash.fdir.id = rxcmp1->cfa_code;
-               mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
        }
 
+       if (BNXT_TRUFLOW_EN(bp))
+               mark_id = bnxt_ulp_set_mark_in_mbuf(rxq->bp, rxcmp1, mbuf,
+                                                   &vfr_flag);
+       else
+               bnxt_set_mark_in_mbuf(rxq->bp, rxcmp1, mbuf);
+
+#ifdef RTE_LIBRTE_IEEE1588
+       if (unlikely((flags_type & RX_PKT_CMPL_FLAGS_MASK) ==
+                    RX_PKT_CMPL_FLAGS_ITYPE_PTP_W_TIMESTAMP)) {
+               mbuf->ol_flags |= PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST;
+               bnxt_get_rx_ts_thor(rxq->bp, rxcmp1->reorder);
+       }
+#endif
        if (agg_buf)
-               bnxt_rx_pages(rxq, mbuf, &tmp_raw_cons, agg_buf);
+               bnxt_rx_pages(rxq, mbuf, &tmp_raw_cons, agg_buf, NULL);
 
        if (rxcmp1->flags2 & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
                mbuf->vlan_tci = rxcmp1->metadata &
                        (RX_PKT_CMPL_METADATA_VID_MASK |
                        RX_PKT_CMPL_METADATA_DE |
                        RX_PKT_CMPL_METADATA_PRI_MASK);
-               mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+               mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
        }
 
+       flags2_f = flags2_0xf(rxcmp1);
+       /* IP Checksum */
+       if (likely(IS_IP_NONTUNNEL_PKT(flags2_f))) {
+               if (unlikely(RX_CMP_IP_CS_ERROR(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+               else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
+               else
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+       } else if (IS_IP_TUNNEL_PKT(flags2_f)) {
+               if (unlikely(RX_CMP_IP_OUTER_CS_ERROR(rxcmp1) ||
+                            RX_CMP_IP_CS_ERROR(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+               else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
+               else
+                       mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+       }
+
+       /* L4 Checksum */
+       if (likely(IS_L4_NONTUNNEL_PKT(flags2_f))) {
+               if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+               else
+                       mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       } else if (IS_L4_TUNNEL_PKT(flags2_f)) {
+               if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
+                       mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+               else
+                       mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+               if (unlikely(RX_CMP_L4_OUTER_CS_ERR2(rxcmp1))) {
+                       mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
+               } else if (unlikely(IS_L4_TUNNEL_PKT_ONLY_INNER_L4_CS
+                                   (flags2_f))) {
+                       mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN;
+               } else {
+                       mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
+               }
+       } else if (unlikely(RX_CMP_L4_CS_UNKNOWN(rxcmp1))) {
+               mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
+       }
+
+       mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
+
 #ifdef BNXT_DEBUG
        if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) {
                /* Re-install the mbuf back to the rx ring */
                bnxt_reuse_rx_mbuf(rxr, cons, mbuf);
-               if (agg_buf)
-                       bnxt_reuse_ag_mbuf(rxr, ag_cons, mbuf);
 
                rc = -EIO;
                goto next_rx;
@@ -446,17 +728,32 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
         */
        prod = RING_NEXT(rxr->rx_ring_struct, prod);
        if (bnxt_alloc_rx_data(rxq, rxr, prod)) {
-               RTE_LOG(ERR, PMD, "mbuf alloc failed with prod=0x%x\n", prod);
+               PMD_DRV_LOG(ERR, "mbuf alloc failed with prod=0x%x\n", prod);
                rc = -ENOMEM;
+               goto rx;
        }
        rxr->rx_prod = prod;
        /*
         * All MBUFs are allocated with the same size under DPDK,
         * no optimization for rx_copy_thresh
         */
-
+rx:
        *rx_pkt = mbuf;
 
+       if (BNXT_TRUFLOW_EN(bp) &&
+           (BNXT_VF_IS_TRUSTED(bp) || BNXT_PF(bp)) &&
+           vfr_flag) {
+               if (!bnxt_vfr_recv(mark_id, rxq->queue_id, mbuf)) {
+                       /* Now return an error so that nb_rx_pkts is not
+                        * incremented.
+                        * This packet was meant to be given to the representor.
+                        * So no need to account the packet and give it to
+                        * parent Rx burst function.
+                        */
+                       rc = -ENODEV;
+               }
+       }
+
 next_rx:
 
        *raw_cons = tmp_raw_cons;
@@ -473,57 +770,149 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        uint32_t raw_cons = cpr->cp_raw_cons;
        uint32_t cons;
        int nb_rx_pkts = 0;
+       int nb_rep_rx_pkts = 0;
        struct rx_pkt_cmpl *rxcmp;
        uint16_t prod = rxr->rx_prod;
        uint16_t ag_prod = rxr->ag_prod;
+       int rc = 0;
+       bool evt = false;
+
+       if (unlikely(is_bnxt_in_error(rxq->bp)))
+               return 0;
+
+       /* If Rx Q was stopped return */
+       if (unlikely(!rxq->rx_started ||
+                    !rte_spinlock_trylock(&rxq->lock)))
+               return 0;
+
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+       /*
+        * Replenish buffers if needed when a transition has been made from
+        * vector- to non-vector- receive processing.
+        */
+       while (unlikely(rxq->rxrearm_nb)) {
+               if (!bnxt_alloc_rx_data(rxq, rxr, rxq->rxrearm_start)) {
+                       rxr->rx_prod = rxq->rxrearm_start;
+                       bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
+                       rxq->rxrearm_start++;
+                       rxq->rxrearm_nb--;
+               } else {
+                       /* Retry allocation on next call. */
+                       break;
+               }
+       }
+#endif
 
        /* Handle RX burst request */
        while (1) {
-               int rc;
-
                cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
                rte_prefetch0(&cpr->cp_desc_ring[cons]);
                rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
 
                if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
                        break;
+               cpr->valid = FLIP_VALID(cons,
+                                       cpr->cp_ring_struct->ring_mask,
+                                       cpr->valid);
 
                /* TODO: Avoid magic numbers... */
                if ((CMP_TYPE(rxcmp) & 0x30) == 0x10) {
                        rc = bnxt_rx_pkt(&rx_pkts[nb_rx_pkts], rxq, &raw_cons);
-                       if (likely(!rc))
+                       if (likely(!rc) || rc == -ENOMEM)
                                nb_rx_pkts++;
                        if (rc == -EBUSY)       /* partial completion */
                                break;
+                       if (rc == -ENODEV)      /* completion for representor */
+                               nb_rep_rx_pkts++;
+               } else if (!BNXT_NUM_ASYNC_CPR(rxq->bp)) {
+                       evt =
+                       bnxt_event_hwrm_resp_handler(rxq->bp,
+                                                    (struct cmpl_base *)rxcmp);
+                       /* If the async event is Fatal error, return */
+                       if (unlikely(is_bnxt_in_error(rxq->bp)))
+                               goto done;
                }
+
                raw_cons = NEXT_RAW_CMP(raw_cons);
-               if (nb_rx_pkts == nb_pkts)
+               if (nb_rx_pkts == nb_pkts || evt)
                        break;
+               /* Post some Rx buf early in case of larger burst processing */
+               if (nb_rx_pkts == BNXT_RX_POST_THRESH)
+                       bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
        }
 
        cpr->cp_raw_cons = raw_cons;
-       if (prod == rxr->rx_prod && ag_prod == rxr->ag_prod) {
+       if (!nb_rx_pkts && !nb_rep_rx_pkts && !evt) {
                /*
                 * For PMD, there is no need to keep on pushing to REARM
                 * the doorbell if there are no new completions
                 */
-               return nb_rx_pkts;
+               goto done;
        }
 
-       B_CP_DIS_DB(cpr, cpr->cp_raw_cons);
-       B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
+       if (prod != rxr->rx_prod)
+               bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
+
        /* Ring the AGG ring DB */
-       B_RX_DB(rxr->ag_doorbell, rxr->ag_prod);
+       if (ag_prod != rxr->ag_prod)
+               bnxt_db_write(&rxr->ag_db, rxr->ag_prod);
+
+       bnxt_db_cq(cpr);
+
+       /* Attempt to alloc Rx buf in case of a previous allocation failure. */
+       if (rc == -ENOMEM) {
+               int i = RING_NEXT(rxr->rx_ring_struct, prod);
+               int cnt = nb_rx_pkts;
+
+               for (; cnt;
+                       i = RING_NEXT(rxr->rx_ring_struct, i), cnt--) {
+                       struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+
+                       /* Buffer already allocated for this index. */
+                       if (rx_buf->mbuf != NULL)
+                               continue;
+
+                       /* This slot is empty. Alloc buffer for Rx */
+                       if (!bnxt_alloc_rx_data(rxq, rxr, i)) {
+                               rxr->rx_prod = i;
+                               bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
+                       } else {
+                               PMD_DRV_LOG(ERR, "Alloc  mbuf failed\n");
+                               break;
+                       }
+               }
+       }
+
+done:
+       rte_spinlock_unlock(&rxq->lock);
+
        return nb_rx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for RX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_recv_pkts(void *rx_queue __rte_unused,
+                    struct rte_mbuf **rx_pkts __rte_unused,
+                    uint16_t nb_pkts __rte_unused)
+{
+       return 0;
+}
+
 void bnxt_free_rx_rings(struct bnxt *bp)
 {
        int i;
+       struct bnxt_rx_queue *rxq;
 
-       for (i = 0; i < (int)bp->rx_nr_rings; i++) {
-               struct bnxt_rx_queue *rxq = bp->rx_queues[i];
+       if (!bp->rx_queues)
+               return;
 
+       for (i = 0; i < (int)bp->rx_nr_rings; i++) {
+               rxq = bp->rx_queues[i];
                if (!rxq)
                        continue;
 
@@ -552,9 +941,7 @@ int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id)
        struct bnxt_rx_ring_info *rxr;
        struct bnxt_ring *ring;
 
-       rxq->rx_buf_use_size = BNXT_MAX_MTU + ETHER_HDR_LEN + ETHER_CRC_LEN +
-                              (2 * VLAN_TAG_SIZE);
-       rxq->rx_buf_size = rxq->rx_buf_use_size + sizeof(struct rte_mbuf);
+       rxq->rx_buf_size = BNXT_MAX_PKT_LEN + sizeof(struct rte_mbuf);
 
        rxr = rte_zmalloc_socket("bnxt_rx_ring",
                                 sizeof(struct bnxt_rx_ring_info),
@@ -639,10 +1026,9 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
        uint16_t size;
 
        size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM;
-       if (rxq->rx_buf_use_size <= size)
-               size = rxq->rx_buf_use_size;
+       size = RTE_MIN(BNXT_MAX_PKT_LEN, size);
 
-       type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT;
+       type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT | RX_PROD_PKT_BD_FLAGS_EOP_PAD;
 
        rxr = rxq->rx_ring;
        ring = rxr->rx_ring_struct;
@@ -650,16 +1036,17 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 
        prod = rxr->rx_prod;
        for (i = 0; i < ring->ring_size; i++) {
-               if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) {
-                       RTE_LOG(WARNING, PMD,
-                               "init'ed rx ring %d with %d/%d mbufs only\n",
-                               rxq->queue_id, i, ring->ring_size);
-                       break;
+               if (unlikely(!rxr->rx_buf_ring[i].mbuf)) {
+                       if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) {
+                               PMD_DRV_LOG(WARNING,
+                                           "init'ed rx ring %d with %d/%d mbufs only\n",
+                                           rxq->queue_id, i, ring->ring_size);
+                               break;
+                       }
                }
                rxr->rx_prod = prod;
                prod = RING_NEXT(rxr->rx_ring_struct, prod);
        }
-       RTE_LOG(DEBUG, PMD, "%s\n", __func__);
 
        ring = rxr->ag_ring_struct;
        type = RX_PROD_AGG_BD_TYPE_RX_PROD_AGG;
@@ -667,28 +1054,34 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
        prod = rxr->ag_prod;
 
        for (i = 0; i < ring->ring_size; i++) {
-               if (bnxt_alloc_ag_data(rxq, rxr, prod) != 0) {
-                       RTE_LOG(WARNING, PMD,
-                       "init'ed AG ring %d with %d/%d mbufs only\n",
-                       rxq->queue_id, i, ring->ring_size);
-                       break;
+               if (unlikely(!rxr->ag_buf_ring[i].mbuf)) {
+                       if (bnxt_alloc_ag_data(rxq, rxr, prod) != 0) {
+                               PMD_DRV_LOG(WARNING,
+                                           "init'ed AG ring %d with %d/%d mbufs only\n",
+                                           rxq->queue_id, i, ring->ring_size);
+                               break;
+                       }
                }
                rxr->ag_prod = prod;
                prod = RING_NEXT(rxr->ag_ring_struct, prod);
        }
-       RTE_LOG(DEBUG, PMD, "%s AGG Done!\n", __func__);
+       PMD_DRV_LOG(DEBUG, "AGG Done!\n");
 
        if (rxr->tpa_info) {
-               for (i = 0; i < BNXT_TPA_MAX; i++) {
-                       rxr->tpa_info[i].mbuf =
-                               __bnxt_alloc_rx_data(rxq->mb_pool);
-                       if (!rxr->tpa_info[i].mbuf) {
-                               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
-                               return -ENOMEM;
+               unsigned int max_aggs = BNXT_TPA_MAX_AGGS(rxq->bp);
+
+               for (i = 0; i < max_aggs; i++) {
+                       if (unlikely(!rxr->tpa_info[i].mbuf)) {
+                               rxr->tpa_info[i].mbuf =
+                                       __bnxt_alloc_rx_data(rxq->mb_pool);
+                               if (!rxr->tpa_info[i].mbuf) {
+                                       rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail);
+                                       return -ENOMEM;
+                               }
                        }
                }
        }
-       RTE_LOG(DEBUG, PMD, "%s TPA alloc Done!\n", __func__);
+       PMD_DRV_LOG(DEBUG, "TPA alloc Done!\n");
 
        return 0;
 }