ethdev: add namespace
[dpdk.git] / drivers / net / bnxt / bnxt_ring.c
index 56bb463..7940d48 100644 (file)
@@ -1,14 +1,14 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2014-2018 Broadcom
+ * Copyright(c) 2014-2021 Broadcom
  * All rights reserved.
  */
 
 #include <rte_bitmap.h>
 #include <rte_memzone.h>
+#include <rte_malloc.h>
 #include <unistd.h>
 
 #include "bnxt.h"
-#include "bnxt_cpr.h"
 #include "bnxt_hwrm.h"
 #include "bnxt_ring.h"
 #include "bnxt_rxq.h"
@@ -38,13 +38,44 @@ void bnxt_free_ring(struct bnxt_ring *ring)
  * Ring groups
  */
 
-int bnxt_init_ring_grps(struct bnxt *bp)
+static void bnxt_init_ring_grps(struct bnxt *bp)
 {
        unsigned int i;
 
        for (i = 0; i < bp->max_ring_grps; i++)
                memset(&bp->grp_info[i], (uint8_t)HWRM_NA_SIGNATURE,
                       sizeof(struct bnxt_ring_grp_info));
+}
+
+int bnxt_alloc_ring_grps(struct bnxt *bp)
+{
+       if (bp->max_tx_rings == 0) {
+               PMD_DRV_LOG(ERR, "No TX rings available!\n");
+               return -EBUSY;
+       }
+
+       /* THOR does not support ring groups.
+        * But we will use the array to save RSS context IDs.
+        */
+       if (BNXT_CHIP_P5(bp)) {
+               bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_P5;
+       } else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
+               /* 1 ring is for default completion ring */
+               PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
+               return -ENOSPC;
+       }
+
+       if (BNXT_HAS_RING_GRPS(bp)) {
+               bp->grp_info = rte_zmalloc("bnxt_grp_info",
+                                          sizeof(*bp->grp_info) *
+                                          bp->max_ring_grps, 0);
+               if (!bp->grp_info) {
+                       PMD_DRV_LOG(ERR,
+                                   "Failed to alloc grp info tbl.\n");
+                       return -ENOMEM;
+               }
+               bnxt_init_ring_grps(bp);
+       }
 
        return 0;
 }
@@ -63,7 +94,7 @@ int bnxt_init_ring_grps(struct bnxt *bp)
  * tx bd ring - Only non-zero length if tx_ring_info is not NULL
  * rx bd ring - Only non-zero length if rx_ring_info is not NULL
  */
-int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
+int bnxt_alloc_rings(struct bnxt *bp, unsigned int socket_id, uint16_t qidx,
                            struct bnxt_tx_queue *txq,
                            struct bnxt_rx_queue *rxq,
                            struct bnxt_cp_ring_info *cp_ring_info,
@@ -73,15 +104,19 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
        struct bnxt_ring *cp_ring = cp_ring_info->cp_ring_struct;
        struct bnxt_rx_ring_info *rx_ring_info = rxq ? rxq->rx_ring : NULL;
        struct bnxt_tx_ring_info *tx_ring_info = txq ? txq->tx_ring : NULL;
-       struct bnxt_ring *tx_ring;
-       struct bnxt_ring *rx_ring;
-       struct rte_pci_device *pdev = bp->pdev;
        uint64_t rx_offloads = bp->eth_dev->data->dev_conf.rxmode.offloads;
+       int ag_ring_start, ag_bitmap_start, tpa_info_start;
+       int ag_vmem_start, cp_ring_start, nq_ring_start;
+       int total_alloc_len, rx_ring_start, rx_ring_len;
+       struct rte_pci_device *pdev = bp->pdev;
+       struct bnxt_ring *tx_ring, *rx_ring;
        const struct rte_memzone *mz = NULL;
        char mz_name[RTE_MEMZONE_NAMESIZE];
-       rte_iova_t mz_phys_addr_base;
        rte_iova_t mz_phys_addr;
-       int sz;
+       int ag_bitmap_len =  0;
+       int tpa_info_len = 0;
+       int ag_vmem_len = 0;
+       int ag_ring_len = 0;
 
        int stats_len = (tx_ring_info || rx_ring_info) ?
            RTE_CACHE_LINE_ROUNDUP(sizeof(struct hwrm_stat_ctx_query_output) -
@@ -92,7 +127,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
        int cp_vmem_len = RTE_CACHE_LINE_ROUNDUP(cp_ring->vmem_size);
        cp_vmem_len = RTE_ALIGN(cp_vmem_len, 128);
 
-       int nq_vmem_len = BNXT_CHIP_THOR(bp) ?
+       int nq_vmem_len = nq_ring_info ?
                RTE_CACHE_LINE_ROUNDUP(cp_ring->vmem_size) : 0;
        nq_vmem_len = RTE_ALIGN(nq_vmem_len, 128);
 
@@ -109,14 +144,12 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
                RTE_CACHE_LINE_ROUNDUP(rx_ring_info->
                                                rx_ring_struct->vmem_size) : 0;
        rx_vmem_len = RTE_ALIGN(rx_vmem_len, 128);
-       int ag_vmem_start = 0;
-       int ag_vmem_len = 0;
-       int cp_ring_start =  0;
-       int nq_ring_start = 0;
 
        ag_vmem_start = rx_vmem_start + rx_vmem_len;
-       ag_vmem_len = rx_ring_info ? RTE_CACHE_LINE_ROUNDUP(
-                               rx_ring_info->ag_ring_struct->vmem_size) : 0;
+       if (bnxt_need_agg_ring(bp->eth_dev))
+               ag_vmem_len = rx_ring_info && rx_ring_info->ag_ring_struct ?
+                       RTE_CACHE_LINE_ROUNDUP(rx_ring_info->ag_ring_struct->vmem_size) : 0;
+
        cp_ring_start = ag_vmem_start + ag_vmem_len;
        cp_ring_start = RTE_ALIGN(cp_ring_start, 4096);
 
@@ -126,48 +159,55 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
        nq_ring_start = cp_ring_start + cp_ring_len;
        nq_ring_start = RTE_ALIGN(nq_ring_start, 4096);
 
-       int nq_ring_len = BNXT_CHIP_THOR(bp) ? cp_ring_len : 0;
+       int nq_ring_len = nq_ring_info ? cp_ring_len : 0;
 
        int tx_ring_start = nq_ring_start + nq_ring_len;
+       tx_ring_start = RTE_ALIGN(tx_ring_start, 4096);
        int tx_ring_len = tx_ring_info ?
            RTE_CACHE_LINE_ROUNDUP(tx_ring_info->tx_ring_struct->ring_size *
                                   sizeof(struct tx_bd_long)) : 0;
        tx_ring_len = RTE_ALIGN(tx_ring_len, 4096);
 
-       int rx_ring_start = tx_ring_start + tx_ring_len;
-       int rx_ring_len =  rx_ring_info ?
+       rx_ring_start = tx_ring_start + tx_ring_len;
+       rx_ring_start = RTE_ALIGN(rx_ring_start, 4096);
+       rx_ring_len =  rx_ring_info ?
                RTE_CACHE_LINE_ROUNDUP(rx_ring_info->rx_ring_struct->ring_size *
                sizeof(struct rx_prod_pkt_bd)) : 0;
        rx_ring_len = RTE_ALIGN(rx_ring_len, 4096);
 
-       int ag_ring_start = rx_ring_start + rx_ring_len;
-       int ag_ring_len = rx_ring_len * AGG_RING_SIZE_FACTOR;
-       ag_ring_len = RTE_ALIGN(ag_ring_len, 4096);
+       ag_ring_start = rx_ring_start + rx_ring_len;
+       ag_ring_start = RTE_ALIGN(ag_ring_start, 4096);
+
+       if (bnxt_need_agg_ring(bp->eth_dev)) {
+               ag_ring_len = rx_ring_len * AGG_RING_SIZE_FACTOR;
+               ag_ring_len = RTE_ALIGN(ag_ring_len, 4096);
 
-       int ag_bitmap_start = ag_ring_start + ag_ring_len;
-       int ag_bitmap_len =  rx_ring_info ?
+               ag_bitmap_len =  rx_ring_info ?
                RTE_CACHE_LINE_ROUNDUP(rte_bitmap_get_memory_footprint(
                        rx_ring_info->rx_ring_struct->ring_size *
                        AGG_RING_SIZE_FACTOR)) : 0;
 
-       int tpa_info_start = ag_bitmap_start + ag_bitmap_len;
-       int tpa_info_len = rx_ring_info ?
-               RTE_CACHE_LINE_ROUNDUP(BNXT_TPA_MAX *
-                                      sizeof(struct bnxt_tpa_info)) : 0;
+               if (rx_ring_info && (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
+                       int tpa_max = BNXT_TPA_MAX_AGGS(bp);
+
+                       tpa_info_len = tpa_max * sizeof(struct bnxt_tpa_info);
+                       tpa_info_len = RTE_CACHE_LINE_ROUNDUP(tpa_info_len);
+               }
+       }
 
-       int total_alloc_len = tpa_info_start;
-       if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
-               total_alloc_len += tpa_info_len;
+       ag_bitmap_start = ag_ring_start + ag_ring_len;
+       tpa_info_start = ag_bitmap_start + ag_bitmap_len;
+       total_alloc_len = tpa_info_start + tpa_info_len;
 
        snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
-                "bnxt_%04x:%02x:%02x:%02x-%04x_%s", pdev->addr.domain,
+                "bnxt_" PCI_PRI_FMT "-%04x_%s", pdev->addr.domain,
                 pdev->addr.bus, pdev->addr.devid, pdev->addr.function, qidx,
                 suffix);
        mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
        mz = rte_memzone_lookup(mz_name);
        if (!mz) {
                mz = rte_memzone_reserve_aligned(mz_name, total_alloc_len,
-                               SOCKET_ID_ANY,
+                               socket_id,
                                RTE_MEMZONE_2MB |
                                RTE_MEMZONE_SIZE_HINT_ONLY |
                                RTE_MEMZONE_IOVA_CONTIG,
@@ -176,23 +216,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
                        return -ENOMEM;
        }
        memset(mz->addr, 0, mz->len);
-       mz_phys_addr_base = mz->iova;
        mz_phys_addr = mz->iova;
-       if ((unsigned long)mz->addr == mz_phys_addr_base) {
-               PMD_DRV_LOG(WARNING,
-                       "Memzone physical address same as virtual.\n");
-               PMD_DRV_LOG(WARNING,
-                       "Using rte_mem_virt2iova()\n");
-               for (sz = 0; sz < total_alloc_len; sz += getpagesize())
-                       rte_mem_lock_page(((char *)mz->addr) + sz);
-               mz_phys_addr_base = rte_mem_virt2iova(mz->addr);
-               mz_phys_addr = rte_mem_virt2iova(mz->addr);
-               if (mz_phys_addr == 0) {
-                       PMD_DRV_LOG(ERR,
-                       "unable to map ring address to physical memory\n");
-                       return -ENOMEM;
-               }
-       }
 
        if (tx_ring_info) {
                txq->mz = mz;
@@ -210,7 +234,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
                        tx_ring->vmem =
                            (void **)((char *)mz->addr + tx_vmem_start);
                        tx_ring_info->tx_buf_ring =
-                           (struct bnxt_sw_tx_bd *)tx_ring->vmem;
+                           (struct rte_mbuf **)tx_ring->vmem;
                }
        }
 
@@ -231,37 +255,39 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
                        rx_ring->vmem =
                            (void **)((char *)mz->addr + rx_vmem_start);
                        rx_ring_info->rx_buf_ring =
-                           (struct bnxt_sw_rx_bd *)rx_ring->vmem;
+                           (struct rte_mbuf **)rx_ring->vmem;
                }
 
-               rx_ring = rx_ring_info->ag_ring_struct;
-
-               rx_ring->bd = ((char *)mz->addr + ag_ring_start);
-               rx_ring_info->ag_desc_ring =
-                   (struct rx_prod_pkt_bd *)rx_ring->bd;
-               rx_ring->bd_dma = mz->iova + ag_ring_start;
-               rx_ring_info->ag_desc_mapping = rx_ring->bd_dma;
-               rx_ring->mem_zone = (const void *)mz;
-
-               if (!rx_ring->bd)
-                       return -ENOMEM;
-               if (rx_ring->vmem_size) {
-                       rx_ring->vmem =
-                           (void **)((char *)mz->addr + ag_vmem_start);
-                       rx_ring_info->ag_buf_ring =
-                           (struct bnxt_sw_rx_bd *)rx_ring->vmem;
+               if (bnxt_need_agg_ring(bp->eth_dev)) {
+                       rx_ring = rx_ring_info->ag_ring_struct;
+
+                       rx_ring->bd = ((char *)mz->addr + ag_ring_start);
+                       rx_ring_info->ag_desc_ring =
+                           (struct rx_prod_pkt_bd *)rx_ring->bd;
+                       rx_ring->bd_dma = mz->iova + ag_ring_start;
+                       rx_ring_info->ag_desc_mapping = rx_ring->bd_dma;
+                       rx_ring->mem_zone = (const void *)mz;
+
+                       if (!rx_ring->bd)
+                               return -ENOMEM;
+                       if (rx_ring->vmem_size) {
+                               rx_ring->vmem =
+                                   (void **)((char *)mz->addr + ag_vmem_start);
+                               rx_ring_info->ag_buf_ring =
+                                   (struct rte_mbuf **)rx_ring->vmem;
+                       }
+
+                       rx_ring_info->ag_bitmap =
+                           rte_bitmap_init(rx_ring_info->rx_ring_struct->ring_size *
+                                           AGG_RING_SIZE_FACTOR, (uint8_t *)mz->addr +
+                                           ag_bitmap_start, ag_bitmap_len);
+
+                       /* TPA info */
+                       if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+                               rx_ring_info->tpa_info =
+                                       ((struct bnxt_tpa_info *)
+                                        ((char *)mz->addr + tpa_info_start));
                }
-
-               rx_ring_info->ag_bitmap =
-                   rte_bitmap_init(rx_ring_info->rx_ring_struct->ring_size *
-                                   AGG_RING_SIZE_FACTOR, (uint8_t *)mz->addr +
-                                   ag_bitmap_start, ag_bitmap_len);
-
-               /* TPA info */
-               if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
-                       rx_ring_info->tpa_info =
-                               ((struct bnxt_tpa_info *)((char *)mz->addr +
-                                                         tpa_info_start));
        }
 
        cp_ring->bd = ((char *)mz->addr + cp_ring_start);
@@ -280,7 +306,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
        }
        cp_ring_info->hw_stats_ctx_id = HWRM_NA_SIGNATURE;
 
-       if (BNXT_HAS_NQ(bp)) {
+       if (nq_ring_info) {
                struct bnxt_ring *nq_ring = nq_ring_info->cp_ring_struct;
 
                nq_ring->bd = (char *)mz->addr + nq_ring_start;
@@ -322,13 +348,11 @@ static void bnxt_set_db(struct bnxt *bp,
                        struct bnxt_db_info *db,
                        uint32_t ring_type,
                        uint32_t map_idx,
-                       uint32_t fid)
+                       uint32_t fid,
+                       uint32_t ring_mask)
 {
-       if (BNXT_CHIP_THOR(bp)) {
-               if (BNXT_PF(bp))
-                       db->doorbell = (char *)bp->doorbell_base + 0x10000;
-               else
-                       db->doorbell = (char *)bp->doorbell_base + 0x4000;
+       if (BNXT_CHIP_P5(bp)) {
+               int db_offset = DB_PF_OFFSET;
                switch (ring_type) {
                case HWRM_RING_ALLOC_INPUT_RING_TYPE_TX:
                        db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SQ;
@@ -341,9 +365,17 @@ static void bnxt_set_db(struct bnxt *bp,
                        db->db_key64 = DBR_PATH_L2 | DBR_TYPE_CQ;
                        break;
                case HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ:
-                       db->db_key64 = DBR_PATH_L2 | DBR_TYPE_NQ;
+                       db->db_key64 = DBR_PATH_L2;
                        break;
                }
+               if (BNXT_CHIP_SR2(bp)) {
+                       db->db_key64 |= DBR_VALID;
+                       db_offset = bp->legacy_db_size;
+               } else if (BNXT_VF(bp)) {
+                       db_offset = DB_VF_OFFSET;
+               }
+
+               db->doorbell = (char *)bp->doorbell_base + db_offset;
                db->db_key64 |= (uint64_t)fid << DBR_XID_SFT;
                db->db_64 = true;
        } else {
@@ -361,14 +393,22 @@ static void bnxt_set_db(struct bnxt *bp,
                }
                db->db_64 = false;
        }
+       db->db_ring_mask = ring_mask;
+
+       if (BNXT_CHIP_SR2(bp)) {
+               db->db_epoch_mask = db->db_ring_mask + 1;
+               db->db_epoch_shift = DBR_EPOCH_SFT -
+                                       rte_log2_u32(db->db_epoch_mask);
+       }
 }
 
 static int bnxt_alloc_cmpl_ring(struct bnxt *bp, int queue_index,
-                               struct bnxt_cp_ring_info *cpr,
-                               struct bnxt_cp_ring_info *nqr)
+                               struct bnxt_cp_ring_info *cpr)
 {
        struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
        uint32_t nq_ring_id = HWRM_NA_SIGNATURE;
+       int cp_ring_index = queue_index + BNXT_RX_VEC_START;
+       struct bnxt_cp_ring_info *nqr = bp->rxtx_nq_ring;
        uint8_t ring_type;
        int rc = 0;
 
@@ -383,47 +423,99 @@ static int bnxt_alloc_cmpl_ring(struct bnxt *bp, int queue_index,
                }
        }
 
-       rc = bnxt_hwrm_ring_alloc(bp, cp_ring, ring_type, queue_index,
-                                 HWRM_NA_SIGNATURE, nq_ring_id);
+       rc = bnxt_hwrm_ring_alloc(bp, cp_ring, ring_type, cp_ring_index,
+                                 HWRM_NA_SIGNATURE, nq_ring_id, 0);
        if (rc)
                return rc;
 
-       cpr->cp_cons = 0;
-       bnxt_set_db(bp, &cpr->cp_db, ring_type, queue_index,
-                   cp_ring->fw_ring_id);
+       cpr->cp_raw_cons = 0;
+       bnxt_set_db(bp, &cpr->cp_db, ring_type, cp_ring_index,
+                   cp_ring->fw_ring_id, cp_ring->ring_mask);
        bnxt_db_cq(cpr);
 
        return 0;
 }
 
-static int bnxt_alloc_nq_ring(struct bnxt *bp, int queue_index,
-                             struct bnxt_cp_ring_info *nqr,
-                             bool rx)
+int bnxt_alloc_rxtx_nq_ring(struct bnxt *bp)
 {
-       struct bnxt_ring *nq_ring = nqr->cp_ring_struct;
+       struct bnxt_cp_ring_info *nqr;
+       struct bnxt_ring *ring;
+       int ring_index = BNXT_NUM_ASYNC_CPR(bp);
        uint8_t ring_type;
        int rc = 0;
 
-       if (!BNXT_HAS_NQ(bp))
-               return -EINVAL;
+       if (!BNXT_HAS_NQ(bp) || bp->rxtx_nq_ring)
+               return 0;
+
+       nqr = rte_zmalloc_socket("nqr",
+                                sizeof(struct bnxt_cp_ring_info),
+                                RTE_CACHE_LINE_SIZE,
+                                bp->eth_dev->device->numa_node);
+       if (nqr == NULL)
+               return -ENOMEM;
+
+       ring = rte_zmalloc_socket("bnxt_cp_ring_struct",
+                                 sizeof(struct bnxt_ring),
+                                 RTE_CACHE_LINE_SIZE,
+                                 bp->eth_dev->device->numa_node);
+       if (ring == NULL) {
+               rte_free(nqr);
+               return -ENOMEM;
+       }
+
+       ring->bd = (void *)nqr->cp_desc_ring;
+       ring->bd_dma = nqr->cp_desc_mapping;
+       ring->ring_size = rte_align32pow2(DEFAULT_CP_RING_SIZE);
+       ring->ring_mask = ring->ring_size - 1;
+       ring->vmem_size = 0;
+       ring->vmem = NULL;
+       ring->fw_ring_id = INVALID_HW_RING_ID;
+
+       nqr->cp_ring_struct = ring;
+       rc = bnxt_alloc_rings(bp, bp->eth_dev->device->numa_node, 0, NULL,
+                             NULL, nqr, NULL, "l2_nqr");
+       if (rc) {
+               rte_free(ring);
+               rte_free(nqr);
+               return -ENOMEM;
+       }
 
        ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ;
 
-       rc = bnxt_hwrm_ring_alloc(bp, nq_ring, ring_type, queue_index,
-                                 HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE);
-       if (rc)
+       rc = bnxt_hwrm_ring_alloc(bp, ring, ring_type, ring_index,
+                                 HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, 0);
+       if (rc) {
+               rte_free(ring);
+               rte_free(nqr);
                return rc;
+       }
 
-       if (rx)
-               bp->grp_info[queue_index].cp_fw_ring_id = nq_ring->fw_ring_id;
-
-       bnxt_set_db(bp, &nqr->cp_db, ring_type, queue_index,
-                   nq_ring->fw_ring_id);
+       bnxt_set_db(bp, &nqr->cp_db, ring_type, ring_index,
+                   ring->fw_ring_id, ring->ring_mask);
        bnxt_db_nq(nqr);
 
+       bp->rxtx_nq_ring = nqr;
+
        return 0;
 }
 
+/* Free RX/TX NQ ring.  */
+void bnxt_free_rxtx_nq_ring(struct bnxt *bp)
+{
+       struct bnxt_cp_ring_info *nqr = bp->rxtx_nq_ring;
+
+       if (!nqr)
+               return;
+
+       bnxt_free_nq_ring(bp, nqr);
+
+       bnxt_free_ring(nqr->cp_ring_struct);
+       rte_free(nqr->cp_ring_struct);
+       nqr->cp_ring_struct = NULL;
+       rte_free(nqr);
+       bp->rxtx_nq_ring = NULL;
+}
+
 static int bnxt_alloc_rx_ring(struct bnxt *bp, int queue_index)
 {
        struct bnxt_rx_queue *rxq = bp->rx_queues[queue_index];
@@ -438,14 +530,16 @@ static int bnxt_alloc_rx_ring(struct bnxt *bp, int queue_index)
 
        rc = bnxt_hwrm_ring_alloc(bp, ring, ring_type,
                                  queue_index, cpr->hw_stats_ctx_id,
-                                 cp_ring->fw_ring_id);
+                                 cp_ring->fw_ring_id, 0);
        if (rc)
                return rc;
 
-       rxr->rx_prod = 0;
-       bp->grp_info[queue_index].rx_fw_ring_id = ring->fw_ring_id;
-       bnxt_set_db(bp, &rxr->rx_db, ring_type, queue_index, ring->fw_ring_id);
-       bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
+       rxr->rx_raw_prod = 0;
+       if (BNXT_HAS_RING_GRPS(bp))
+               bp->grp_info[queue_index].rx_fw_ring_id = ring->fw_ring_id;
+       bnxt_set_db(bp, &rxr->rx_db, ring_type, queue_index, ring->fw_ring_id,
+                   ring->ring_mask);
+       bnxt_db_write(&rxr->rx_db, rxr->rx_raw_prod);
 
        return 0;
 }
@@ -462,9 +556,12 @@ static int bnxt_alloc_rx_agg_ring(struct bnxt *bp, int queue_index)
        uint8_t ring_type;
        int rc = 0;
 
+       if (!bnxt_need_agg_ring(bp->eth_dev))
+               return 0;
+
        ring->fw_rx_ring_id = rxr->rx_ring_struct->fw_ring_id;
 
-       if (BNXT_CHIP_THOR(bp)) {
+       if (BNXT_CHIP_P5(bp)) {
                ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG;
                hw_stats_ctx_id = cpr->hw_stats_ctx_id;
        } else {
@@ -472,15 +569,17 @@ static int bnxt_alloc_rx_agg_ring(struct bnxt *bp, int queue_index)
        }
 
        rc = bnxt_hwrm_ring_alloc(bp, ring, ring_type, map_idx,
-                                 hw_stats_ctx_id, cp_ring->fw_ring_id);
+                                 hw_stats_ctx_id, cp_ring->fw_ring_id, 0);
 
        if (rc)
                return rc;
 
-       rxr->ag_prod = 0;
-       bp->grp_info[queue_index].ag_fw_ring_id = ring->fw_ring_id;
-       bnxt_set_db(bp, &rxr->ag_db, ring_type, map_idx, ring->fw_ring_id);
-       bnxt_db_write(&rxr->ag_db, rxr->ag_prod);
+       rxr->ag_raw_prod = 0;
+       if (BNXT_HAS_RING_GRPS(bp))
+               bp->grp_info[queue_index].ag_fw_ring_id = ring->fw_ring_id;
+       bnxt_set_db(bp, &rxr->ag_db, ring_type, map_idx, ring->fw_ring_id,
+                   ring->ring_mask);
+       bnxt_db_write(&rxr->ag_db, rxr->ag_raw_prod);
 
        return 0;
 }
@@ -490,64 +589,127 @@ int bnxt_alloc_hwrm_rx_ring(struct bnxt *bp, int queue_index)
        struct bnxt_rx_queue *rxq = bp->rx_queues[queue_index];
        struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
        struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
-       struct bnxt_cp_ring_info *nqr = rxq->nq_ring;
        struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
-       int rc = 0;
+       struct bnxt_coal coal;
+       int rc;
 
-       if (BNXT_HAS_NQ(bp)) {
-               if (bnxt_alloc_nq_ring(bp, queue_index, nqr, true))
-                       goto err_out;
-       }
+       /*
+        * Storage for the cp ring is allocated based on worst-case
+        * usage, the actual size to be used by hw is computed here.
+        */
+       cp_ring->ring_size = rxr->rx_ring_struct->ring_size * 2;
+
+       if (bnxt_need_agg_ring(bp->eth_dev))
+               cp_ring->ring_size *= AGG_RING_SIZE_FACTOR;
 
-       if (bnxt_alloc_cmpl_ring(bp, queue_index, cpr, nqr))
+       cp_ring->ring_mask = cp_ring->ring_size - 1;
+
+       rc = bnxt_alloc_cmpl_ring(bp, queue_index, cpr);
+       if (rc)
+               goto err_out;
+
+       rc = bnxt_hwrm_stat_ctx_alloc(bp, cpr);
+       if (rc)
                goto err_out;
 
-       bp->grp_info[queue_index].fw_stats_ctx = cpr->hw_stats_ctx_id;
-       bp->grp_info[queue_index].cp_fw_ring_id = cp_ring->fw_ring_id;
+       if (BNXT_HAS_RING_GRPS(bp)) {
+               bp->grp_info[queue_index].fw_stats_ctx = cpr->hw_stats_ctx_id;
+               bp->grp_info[queue_index].cp_fw_ring_id = cp_ring->fw_ring_id;
+       }
+
+       bnxt_init_dflt_coal(&coal);
+       bnxt_hwrm_set_ring_coal(bp, &coal, cp_ring->fw_ring_id);
 
-       if (!queue_index) {
+       if (!BNXT_NUM_ASYNC_CPR(bp) && !queue_index) {
                /*
-                * In order to save completion resources, use the first
-                * completion ring from PF or VF as the default completion ring
-                * for async event and HWRM forward response handling.
+                * If a dedicated async event completion ring is not enabled,
+                * use the first completion ring from PF or VF as the default
+                * completion ring for async event handling.
                 */
-               bp->def_cp_ring = cpr;
+               bp->async_cp_ring = cpr;
                rc = bnxt_hwrm_set_async_event_cr(bp);
                if (rc)
                        goto err_out;
        }
 
-       if (bnxt_alloc_rx_ring(bp, queue_index))
+       rc = bnxt_alloc_rx_ring(bp, queue_index);
+       if (rc)
                goto err_out;
 
-       if (bnxt_alloc_rx_agg_ring(bp, queue_index))
+       rc = bnxt_alloc_rx_agg_ring(bp, queue_index);
+       if (rc)
                goto err_out;
 
-       rxq->rx_buf_use_size = BNXT_MAX_MTU + RTE_ETHER_HDR_LEN +
-               RTE_ETHER_CRC_LEN + (2 * VLAN_TAG_SIZE);
+       if (BNXT_HAS_RING_GRPS(bp)) {
+               rc = bnxt_hwrm_ring_grp_alloc(bp, queue_index);
+               if (rc)
+                       goto err_out;
+       }
 
-       if (bp->eth_dev->data->rx_queue_state[queue_index] ==
-           RTE_ETH_QUEUE_STATE_STARTED) {
+       if (rxq->rx_started) {
                if (bnxt_init_one_rx_ring(rxq)) {
-                       RTE_LOG(ERR, PMD,
-                               "bnxt_init_one_rx_ring failed!\n");
-                       bnxt_rx_queue_release_op(rxq);
+                       PMD_DRV_LOG(ERR, "bnxt_init_one_rx_ring failed!\n");
+                       bnxt_rx_queue_release_op(bp->eth_dev, queue_index);
                        rc = -ENOMEM;
                        goto err_out;
                }
-               bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
-               bnxt_db_write(&rxr->ag_db, rxr->ag_prod);
+               bnxt_db_write(&rxr->rx_db, rxr->rx_raw_prod);
+               if (bnxt_need_agg_ring(bp->eth_dev))
+                       bnxt_db_write(&rxr->ag_db, rxr->ag_raw_prod);
        }
        rxq->index = queue_index;
-       PMD_DRV_LOG(INFO,
-                   "queue %d, rx_deferred_start %d, state %d!\n",
-                   queue_index, rxq->rx_deferred_start,
-                   bp->eth_dev->data->rx_queue_state[queue_index]);
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+       bnxt_rxq_vec_setup(rxq);
+#endif
+
+       return 0;
 
 err_out:
+       PMD_DRV_LOG(ERR,
+                   "Failed to allocate receive queue %d, rc %d.\n",
+                   queue_index, rc);
        return rc;
 }
 
+/* Initialise all rings to -1, its used to free rings later if allocation
+ * of few rings fails.
+ */
+static void bnxt_init_all_rings(struct bnxt *bp)
+{
+       unsigned int i = 0;
+       struct bnxt_rx_queue *rxq;
+       struct bnxt_ring *cp_ring;
+       struct bnxt_ring *ring;
+       struct bnxt_rx_ring_info *rxr;
+       struct bnxt_tx_queue *txq;
+
+       for (i = 0; i < bp->rx_cp_nr_rings; i++) {
+               rxq = bp->rx_queues[i];
+               /* Rx-compl */
+               cp_ring = rxq->cp_ring->cp_ring_struct;
+               cp_ring->fw_ring_id = INVALID_HW_RING_ID;
+               /* Rx-Reg */
+               rxr = rxq->rx_ring;
+               ring = rxr->rx_ring_struct;
+               ring->fw_ring_id = INVALID_HW_RING_ID;
+               /* Rx-AGG */
+               if (bnxt_need_agg_ring(bp->eth_dev)) {
+                       ring = rxr->ag_ring_struct;
+                       if (ring != NULL)
+                               ring->fw_ring_id = INVALID_HW_RING_ID;
+               }
+       }
+       for (i = 0; i < bp->tx_cp_nr_rings; i++) {
+               txq = bp->tx_queues[i];
+               /* Tx cmpl */
+               cp_ring = txq->cp_ring->cp_ring_struct;
+               cp_ring->fw_ring_id = INVALID_HW_RING_ID;
+               /*Tx Ring */
+               ring = txq->tx_ring->tx_ring_struct;
+               ring->fw_ring_id = INVALID_HW_RING_ID;
+       }
+}
+
 /* ring_grp usage:
  * [0] = default completion ring
  * [1 -> +rx_cp_nr_rings] = rx_cp, rx rings
@@ -557,93 +719,188 @@ int bnxt_alloc_hwrm_rings(struct bnxt *bp)
 {
        struct bnxt_coal coal;
        unsigned int i;
-       uint8_t ring_type;
        int rc = 0;
 
        bnxt_init_dflt_coal(&coal);
+       bnxt_init_all_rings(bp);
 
        for (i = 0; i < bp->rx_cp_nr_rings; i++) {
-               struct bnxt_rx_queue *rxq = bp->rx_queues[i];
-               struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
-               struct bnxt_cp_ring_info *nqr = rxq->nq_ring;
-               struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
+               unsigned int soc_id = bp->eth_dev->device->numa_node;
+               struct bnxt_rx_queue *rxq  = bp->rx_queues[i];
                struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
-
-               if (BNXT_HAS_NQ(bp)) {
-                       if (bnxt_alloc_nq_ring(bp, i, nqr, true))
-                               goto err_out;
+               struct bnxt_ring *ring;
+
+               if (bnxt_need_agg_ring(bp->eth_dev)) {
+                       ring = rxr->ag_ring_struct;
+                       if (ring == NULL) {
+                               bnxt_free_rxq_mem(rxq);
+
+                               rc = bnxt_init_rx_ring_struct(rxq, soc_id);
+                               if (rc)
+                                       goto err_out;
+
+                               rc = bnxt_alloc_rings(bp, soc_id,
+                                                     i, NULL, rxq,
+                                                     rxq->cp_ring, NULL,
+                                                     "rxr");
+                               if (rc)
+                                       goto err_out;
+                       }
                }
 
-               if (bnxt_alloc_cmpl_ring(bp, i, cpr, nqr))
+               rc = bnxt_alloc_hwrm_rx_ring(bp, i);
+               if (rc)
                        goto err_out;
+       }
 
-               bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id;
-               bp->grp_info[i].cp_fw_ring_id = cp_ring->fw_ring_id;
-
-               bnxt_hwrm_set_ring_coal(bp, &coal, cp_ring->fw_ring_id);
-
-               if (!i) {
-                       /*
-                        * In order to save completion resource, use the first
-                        * completion ring from PF or VF as the default
-                        * completion ring for async event & HWRM
-                        * forward response handling.
-                        */
-                       bp->def_cp_ring = cpr;
-                       rc = bnxt_hwrm_set_async_event_cr(bp);
-                       if (rc)
-                               goto err_out;
-               }
-
-               if (bnxt_alloc_rx_ring(bp, i))
+       /* If something is wrong with Rx ring alloc, skip Tx ring alloc */
+       for (i = 0; i < bp->tx_cp_nr_rings; i++) {
+               rc = bnxt_alloc_hwrm_tx_ring(bp, i);
+               if (rc)
                        goto err_out;
+       }
 
-               if (bnxt_alloc_rx_agg_ring(bp, i))
-                       goto err_out;
+err_out:
+       return rc;
+}
 
-               rxq->rx_buf_use_size = BNXT_MAX_MTU + RTE_ETHER_HDR_LEN +
-                                       RTE_ETHER_CRC_LEN + (2 * VLAN_TAG_SIZE);
-               if (bnxt_init_one_rx_ring(rxq)) {
-                       PMD_DRV_LOG(ERR, "bnxt_init_one_rx_ring failed!\n");
-                       bnxt_rx_queue_release_op(rxq);
-                       return -ENOMEM;
-               }
-               bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
-               bnxt_db_write(&rxr->ag_db, rxr->ag_prod);
-               rxq->index = i;
-       }
+/* Allocate dedicated async completion ring. */
+int bnxt_alloc_async_cp_ring(struct bnxt *bp)
+{
+       struct bnxt_cp_ring_info *cpr = bp->async_cp_ring;
+       struct bnxt_ring *cp_ring;
+       uint8_t ring_type;
+       int rc;
 
-       for (i = 0; i < bp->tx_cp_nr_rings; i++) {
-               struct bnxt_tx_queue *txq = bp->tx_queues[i];
-               struct bnxt_cp_ring_info *cpr = txq->cp_ring;
-               struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
-               struct bnxt_cp_ring_info *nqr = txq->nq_ring;
-               struct bnxt_tx_ring_info *txr = txq->tx_ring;
-               struct bnxt_ring *ring = txr->tx_ring_struct;
-               unsigned int idx = i + bp->rx_cp_nr_rings;
-
-               if (BNXT_HAS_NQ(bp)) {
-                       if (bnxt_alloc_nq_ring(bp, idx, nqr, false))
-                               goto err_out;
-               }
+       if (BNXT_NUM_ASYNC_CPR(bp) == 0 || cpr == NULL)
+               return 0;
 
-               if (bnxt_alloc_cmpl_ring(bp, idx, cpr, nqr))
-                       goto err_out;
+       cp_ring = cpr->cp_ring_struct;
 
-               /* Tx ring */
-               ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_TX;
-               rc = bnxt_hwrm_ring_alloc(bp, ring,
-                                         ring_type,
-                                         idx, cpr->hw_stats_ctx_id,
-                                         cp_ring->fw_ring_id);
-               if (rc)
-                       goto err_out;
+       if (BNXT_HAS_NQ(bp))
+               ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ;
+       else
+               ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL;
+
+       rc = bnxt_hwrm_ring_alloc(bp, cp_ring, ring_type, 0,
+                                 HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, 0);
+
+       if (rc)
+               return rc;
+
+       cpr->cp_raw_cons = 0;
+       bnxt_set_db(bp, &cpr->cp_db, ring_type, 0,
+                   cp_ring->fw_ring_id, cp_ring->ring_mask);
 
-               bnxt_set_db(bp, &txr->tx_db, ring_type, idx, ring->fw_ring_id);
-               txq->index = idx;
-               bnxt_hwrm_set_ring_coal(bp, &coal, cp_ring->fw_ring_id);
+       if (BNXT_HAS_NQ(bp))
+               bnxt_db_nq(cpr);
+       else
+               bnxt_db_cq(cpr);
+
+       return bnxt_hwrm_set_async_event_cr(bp);
+}
+
+/* Free dedicated async completion ring. */
+void bnxt_free_async_cp_ring(struct bnxt *bp)
+{
+       struct bnxt_cp_ring_info *cpr = bp->async_cp_ring;
+
+       if (BNXT_NUM_ASYNC_CPR(bp) == 0 || cpr == NULL)
+               return;
+
+       if (BNXT_HAS_NQ(bp))
+               bnxt_free_nq_ring(bp, cpr);
+       else
+               bnxt_free_cp_ring(bp, cpr);
+
+       bnxt_free_ring(cpr->cp_ring_struct);
+       rte_free(cpr->cp_ring_struct);
+       cpr->cp_ring_struct = NULL;
+       rte_free(cpr);
+       bp->async_cp_ring = NULL;
+}
+
+int bnxt_alloc_async_ring_struct(struct bnxt *bp)
+{
+       struct bnxt_cp_ring_info *cpr = NULL;
+       struct bnxt_ring *ring = NULL;
+
+       if (BNXT_NUM_ASYNC_CPR(bp) == 0)
+               return 0;
+
+       cpr = rte_zmalloc_socket("cpr",
+                                sizeof(struct bnxt_cp_ring_info),
+                                RTE_CACHE_LINE_SIZE,
+                                bp->eth_dev->device->numa_node);
+       if (cpr == NULL)
+               return -ENOMEM;
+
+       ring = rte_zmalloc_socket("bnxt_cp_ring_struct",
+                                 sizeof(struct bnxt_ring),
+                                 RTE_CACHE_LINE_SIZE,
+                                 bp->eth_dev->device->numa_node);
+       if (ring == NULL) {
+               rte_free(cpr);
+               return -ENOMEM;
        }
 
+       ring->bd = (void *)cpr->cp_desc_ring;
+       ring->bd_dma = cpr->cp_desc_mapping;
+       ring->ring_size = rte_align32pow2(DEFAULT_CP_RING_SIZE);
+       ring->ring_mask = ring->ring_size - 1;
+       ring->vmem_size = 0;
+       ring->vmem = NULL;
+
+       bp->async_cp_ring = cpr;
+       cpr->cp_ring_struct = ring;
+
+       return bnxt_alloc_rings(bp, bp->eth_dev->device->numa_node, 0, NULL,
+                               NULL, bp->async_cp_ring, NULL, "def_cp");
+}
+
+int bnxt_alloc_hwrm_tx_ring(struct bnxt *bp, int queue_index)
+{
+       struct bnxt_tx_queue *txq = bp->tx_queues[queue_index];
+       struct bnxt_cp_ring_info *cpr = txq->cp_ring;
+       struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
+       struct bnxt_tx_ring_info *txr = txq->tx_ring;
+       struct bnxt_ring *ring = txr->tx_ring_struct;
+       unsigned int idx = queue_index + bp->rx_cp_nr_rings;
+       uint16_t tx_cosq_id = 0;
+       struct bnxt_coal coal;
+       int rc = 0;
+
+       rc = bnxt_alloc_cmpl_ring(bp, idx, cpr);
+       if (rc)
+               goto err_out;
+
+       bnxt_init_dflt_coal(&coal);
+       bnxt_hwrm_set_ring_coal(bp, &coal, cp_ring->fw_ring_id);
+
+       rc = bnxt_hwrm_stat_ctx_alloc(bp, cpr);
+       if (rc)
+               goto err_out;
+
+       if (bp->vnic_cap_flags & BNXT_VNIC_CAP_COS_CLASSIFY)
+               tx_cosq_id = bp->tx_cosq_id[queue_index < bp->max_lltc ? queue_index : 0];
+       else
+               tx_cosq_id = bp->tx_cosq_id[0];
+
+       rc = bnxt_hwrm_ring_alloc(bp, ring,
+                                 HWRM_RING_ALLOC_INPUT_RING_TYPE_TX,
+                                 queue_index, cpr->hw_stats_ctx_id,
+                                 cp_ring->fw_ring_id,
+                                 tx_cosq_id);
+       if (rc)
+               goto err_out;
+
+       bnxt_set_db(bp, &txr->tx_db, HWRM_RING_ALLOC_INPUT_RING_TYPE_TX,
+                   queue_index, ring->fw_ring_id,
+                   ring->ring_mask);
+       txq->index = idx;
+
+       return rc;
 err_out:
+       bnxt_free_hwrm_tx_ring(bp, queue_index);
        return rc;
 }