log: introduce logtype register macro
[dpdk.git] / drivers / raw / dpaa2_qdma / dpaa2_qdma.c
index 1d15c30..0b9c4e3 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018 NXP
+ * Copyright 2018-2019 NXP
  */
 
 #include <string.h>
 #include <rte_malloc.h>
 #include <rte_ring.h>
 #include <rte_mempool.h>
+#include <rte_prefetch.h>
+#include <rte_kvargs.h>
 
 #include <mc/fsl_dpdmai.h>
 #include <portal/dpaa2_hw_pvt.h>
 #include <portal/dpaa2_hw_dpio.h>
 
+#include "rte_pmd_dpaa2_qdma.h"
 #include "dpaa2_qdma.h"
 #include "dpaa2_qdma_logs.h"
-#include "rte_pmd_dpaa2_qdma.h"
 
-/* Dynamic log type identifier */
-int dpaa2_qdma_logtype;
+#define DPAA2_QDMA_NO_PREFETCH "no_prefetch"
+
+uint32_t dpaa2_coherent_no_alloc_cache;
+uint32_t dpaa2_coherent_alloc_cache;
 
 /* QDMA device */
 static struct qdma_device qdma_dev;
@@ -34,10 +38,279 @@ static struct qdma_hw_queue_list qdma_queue_list
        = TAILQ_HEAD_INITIALIZER(qdma_queue_list);
 
 /* QDMA Virtual Queues */
-struct qdma_virt_queue *qdma_vqs;
+static struct qdma_virt_queue *qdma_vqs;
 
 /* QDMA per core data */
-struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
+static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
+
+typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
+                                           uint16_t rxq_id,
+                                           uint16_t *vq_id,
+                                           struct rte_qdma_job **job,
+                                           uint16_t nb_jobs);
+
+dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
+
+typedef uint16_t (dpdmai_dev_get_job_t)(const struct qbman_fd *fd,
+                                       struct rte_qdma_job **job);
+typedef int (dpdmai_dev_set_fd_t)(struct qbman_fd *fd,
+                                 struct rte_qdma_job *job,
+                                 struct rte_qdma_rbp *rbp,
+                                 uint16_t vq_id);
+dpdmai_dev_get_job_t *dpdmai_dev_get_job;
+dpdmai_dev_set_fd_t *dpdmai_dev_set_fd;
+
+static inline int
+qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
+                       uint32_t len, struct qbman_fd *fd,
+                       struct rte_qdma_rbp *rbp)
+{
+       fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
+       fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
+
+       fd->simple_pci.len_sl = len;
+
+       fd->simple_pci.bmt = 1;
+       fd->simple_pci.fmt = 3;
+       fd->simple_pci.sl = 1;
+       fd->simple_pci.ser = 1;
+
+       fd->simple_pci.sportid = rbp->sportid;  /*pcie 3 */
+       fd->simple_pci.srbp = rbp->srbp;
+       if (rbp->srbp)
+               fd->simple_pci.rdttype = 0;
+       else
+               fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache;
+
+       /*dest is pcie memory */
+       fd->simple_pci.dportid = rbp->dportid;  /*pcie 3 */
+       fd->simple_pci.drbp = rbp->drbp;
+       if (rbp->drbp)
+               fd->simple_pci.wrttype = 0;
+       else
+               fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache;
+
+       fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest));
+       fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest));
+
+       return 0;
+}
+
+static inline int
+qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
+                       uint32_t len, struct qbman_fd *fd)
+{
+       fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
+       fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
+
+       fd->simple_ddr.len = len;
+
+       fd->simple_ddr.bmt = 1;
+       fd->simple_ddr.fmt = 3;
+       fd->simple_ddr.sl = 1;
+       fd->simple_ddr.ser = 1;
+       /**
+        * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
+        * Coherent copy of cacheable memory,
+        * lookup in downstream cache, no allocate
+        * on miss
+        */
+       fd->simple_ddr.rns = 0;
+       fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache;
+       /**
+        * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111
+        * Coherent write of cacheable memory,
+        * lookup in downstream cache, no allocate on miss
+        */
+       fd->simple_ddr.wns = 0;
+       fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache;
+
+       fd->simple_ddr.daddr_lo = lower_32_bits((uint64_t) (dest));
+       fd->simple_ddr.daddr_hi = upper_32_bits((uint64_t) (dest));
+
+       return 0;
+}
+
+static void
+dpaa2_qdma_populate_fle(struct qbman_fle *fle,
+                       struct rte_qdma_rbp *rbp,
+                       uint64_t src, uint64_t dest,
+                       size_t len, uint32_t flags)
+{
+       struct qdma_sdd *sdd;
+
+       sdd = (struct qdma_sdd *)((uint8_t *)(fle) +
+               (DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle)));
+
+       /* first frame list to source descriptor */
+       DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd));
+       DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
+
+       /* source and destination descriptor */
+       if (rbp && rbp->enable) {
+               /* source */
+               sdd->read_cmd.portid = rbp->sportid;
+               sdd->rbpcmd_simple.pfid = rbp->spfid;
+               sdd->rbpcmd_simple.vfid = rbp->svfid;
+
+               if (rbp->srbp) {
+                       sdd->read_cmd.rbp = rbp->srbp;
+                       sdd->read_cmd.rdtype = DPAA2_RBP_MEM_RW;
+               } else {
+                       sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+               }
+               sdd++;
+               /* destination */
+               sdd->write_cmd.portid = rbp->dportid;
+               sdd->rbpcmd_simple.pfid = rbp->dpfid;
+               sdd->rbpcmd_simple.vfid = rbp->dvfid;
+
+               if (rbp->drbp) {
+                       sdd->write_cmd.rbp = rbp->drbp;
+                       sdd->write_cmd.wrttype = DPAA2_RBP_MEM_RW;
+               } else {
+                       sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+               }
+
+       } else {
+               sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+               sdd++;
+               sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+       }
+       fle++;
+       /* source frame list to source buffer */
+       if (flags & RTE_QDMA_JOB_SRC_PHY) {
+               DPAA2_SET_FLE_ADDR(fle, src);
+               DPAA2_SET_FLE_BMT(fle);
+       } else {
+               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
+       }
+       DPAA2_SET_FLE_LEN(fle, len);
+
+       fle++;
+       /* destination frame list to destination buffer */
+       if (flags & RTE_QDMA_JOB_DEST_PHY) {
+               DPAA2_SET_FLE_BMT(fle);
+               DPAA2_SET_FLE_ADDR(fle, dest);
+       } else {
+               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
+       }
+       DPAA2_SET_FLE_LEN(fle, len);
+
+       /* Final bit: 1, for last frame list */
+       DPAA2_SET_FLE_FIN(fle);
+}
+
+static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd,
+                                       struct rte_qdma_job *job,
+                                       struct rte_qdma_rbp *rbp,
+                                       uint16_t vq_id)
+{
+       struct rte_qdma_job **ppjob;
+       size_t iova;
+       int ret = 0;
+
+       if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK)
+               iova = (size_t)job->dest;
+       else
+               iova = (size_t)job->src;
+
+       /* Set the metadata */
+       job->vq_id = vq_id;
+       ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+       *ppjob = job;
+
+       if ((rbp->drbp == 1) || (rbp->srbp == 1))
+               ret = qdma_populate_fd_pci((phys_addr_t) job->src,
+                                          (phys_addr_t) job->dest,
+                                          job->len, fd, rbp);
+       else
+               ret = qdma_populate_fd_ddr((phys_addr_t) job->src,
+                                          (phys_addr_t) job->dest,
+                                          job->len, fd);
+       return ret;
+}
+static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
+                                       struct rte_qdma_job *job,
+                                       struct rte_qdma_rbp *rbp,
+                                       uint16_t vq_id)
+{
+       struct rte_qdma_job **ppjob;
+       struct qbman_fle *fle;
+       int ret = 0;
+       /*
+        * Get an FLE/SDD from FLE pool.
+        * Note: IO metadata is before the FLE and SDD memory.
+        */
+       ret = rte_mempool_get(qdma_dev.fle_pool, (void **)(&ppjob));
+       if (ret) {
+               DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+               return ret;
+       }
+
+       /* Set the metadata */
+       job->vq_id = vq_id;
+       *ppjob = job;
+
+       fle = (struct qbman_fle *)(ppjob + 1);
+
+       DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(fle));
+       DPAA2_SET_FD_COMPOUND_FMT(fd);
+       DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+
+       /* Populate FLE */
+       memset(fle, 0, QDMA_FLE_POOL_SIZE);
+       dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest,
+                               job->len, job->flags);
+
+       return 0;
+}
+
+static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd,
+                                       struct rte_qdma_job **job)
+{
+       uint16_t vqid;
+       size_t iova;
+       struct rte_qdma_job **ppjob;
+
+       if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
+               iova = (size_t) (((uint64_t)fd->simple_pci.daddr_hi) << 32
+                               | (uint64_t)fd->simple_pci.daddr_lo);
+       else
+               iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
+                               | (uint64_t)fd->simple_pci.saddr_lo);
+
+       ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+       *job = (struct rte_qdma_job *)*ppjob;
+       (*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error);
+       vqid = (*job)->vq_id;
+
+       return vqid;
+}
+
+static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd,
+                                       struct rte_qdma_job **job)
+{
+       struct rte_qdma_job **ppjob;
+       uint16_t vqid;
+       /*
+        * Fetch metadata from FLE. job and vq_id were set
+        * in metadata in the enqueue operation.
+        */
+       ppjob = (struct rte_qdma_job **)
+                       DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+       ppjob -= 1;
+
+       *job = (struct rte_qdma_job *)*ppjob;
+       (*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
+                        (DPAA2_GET_FD_FRC(fd) & 0xFF);
+       vqid = (*job)->vq_id;
+
+       /* Free FLE to the pool */
+       rte_mempool_put(qdma_dev.fle_pool, (void *)ppjob);
+
+       return vqid;
+}
 
 static struct qdma_hw_queue *
 alloc_hw_queue(uint32_t lcore_id)
@@ -143,7 +416,7 @@ put_hw_queue(struct qdma_hw_queue *queue)
        }
 }
 
-int __rte_experimental
+int
 rte_qdma_init(void)
 {
        DPAA2_QDMA_FUNC_TRACE();
@@ -153,7 +426,7 @@ rte_qdma_init(void)
        return 0;
 }
 
-void __rte_experimental
+void
 rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr)
 {
        DPAA2_QDMA_FUNC_TRACE();
@@ -161,7 +434,7 @@ rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr)
        qdma_attr->num_hw_queues = qdma_dev.num_hw_queues;
 }
 
-int __rte_experimental
+int
 rte_qdma_reset(void)
 {
        struct qdma_hw_queue *queue;
@@ -215,10 +488,11 @@ rte_qdma_reset(void)
        return 0;
 }
 
-int __rte_experimental
+int
 rte_qdma_configure(struct rte_qdma_config *qdma_config)
 {
        int ret;
+       char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
 
        DPAA2_QDMA_FUNC_TRACE();
 
@@ -258,8 +532,12 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
        }
        qdma_dev.max_vqs = qdma_config->max_vqs;
 
-       /* Allocate FLE pool */
-       qdma_dev.fle_pool = rte_mempool_create("qdma_fle_pool",
+       /* Allocate FLE pool; just append PID so that in case of
+        * multiprocess, the pool's don't collide.
+        */
+       snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
+                getpid());
+       qdma_dev.fle_pool = rte_mempool_create(fle_pool_name,
                        qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
                        QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
                        NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
@@ -271,10 +549,17 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
        }
        qdma_dev.fle_pool_count = qdma_config->fle_pool_count;
 
+       if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
+               dpdmai_dev_get_job = dpdmai_dev_get_job_us;
+               dpdmai_dev_set_fd = dpdmai_dev_set_fd_us;
+       } else {
+               dpdmai_dev_get_job = dpdmai_dev_get_job_lf;
+               dpdmai_dev_set_fd = dpdmai_dev_set_fd_lf;
+       }
        return 0;
 }
 
-int __rte_experimental
+int
 rte_qdma_start(void)
 {
        DPAA2_QDMA_FUNC_TRACE();
@@ -284,7 +569,7 @@ rte_qdma_start(void)
        return 0;
 }
 
-int __rte_experimental
+int
 rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
 {
        char ring_name[32];
@@ -303,6 +588,7 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
        /* Return in case no VQ is free */
        if (i == qdma_dev.max_vqs) {
                rte_spinlock_unlock(&qdma_dev.lock);
+               DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
                return -ENODEV;
        }
 
@@ -313,7 +599,7 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
                qdma_vqs[i].exclusive_hw_queue = 1;
        } else {
                /* Allocate a Ring for Virutal Queue in VQ mode */
-               sprintf(ring_name, "status ring %d", i);
+               snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
                qdma_vqs[i].status_ring = rte_ring_create(ring_name,
                        qdma_dev.fle_pool_count, rte_socket_id(), 0);
                if (!qdma_vqs[i].status_ring) {
@@ -338,77 +624,48 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
 
        qdma_vqs[i].in_use = 1;
        qdma_vqs[i].lcore_id = lcore_id;
-
+       memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
        rte_spinlock_unlock(&qdma_dev.lock);
 
        return i;
 }
 
-static void
-dpaa2_qdma_populate_fle(struct qbman_fle *fle,
-                       uint64_t src, uint64_t dest,
-                       size_t len, uint32_t flags)
+/*create vq for route-by-port*/
+int
+rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags,
+                       struct rte_qdma_rbp *rbp)
 {
-       struct qdma_sdd *sdd;
-
-       DPAA2_QDMA_FUNC_TRACE();
-
-       sdd = (struct qdma_sdd *)((uint8_t *)(fle) +
-               (DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle)));
+       int i;
 
-       /* first frame list to source descriptor */
-       DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd));
-       DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
+       i = rte_qdma_vq_create(lcore_id, flags);
 
-       /* source and destination descriptor */
-       DPAA2_SET_SDD_RD_COHERENT(sdd); /* source descriptor CMD */
-       sdd++;
-       DPAA2_SET_SDD_WR_COHERENT(sdd); /* dest descriptor CMD */
-
-       fle++;
-       /* source frame list to source buffer */
-       if (flags & RTE_QDMA_JOB_SRC_PHY) {
-               DPAA2_SET_FLE_ADDR(fle, src);
-               DPAA2_SET_FLE_BMT(fle);
-       } else {
-               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
-       }
-       DPAA2_SET_FLE_LEN(fle, len);
+       memcpy(&qdma_vqs[i].rbp, rbp, sizeof(struct rte_qdma_rbp));
 
-       fle++;
-       /* destination frame list to destination buffer */
-       if (flags & RTE_QDMA_JOB_DEST_PHY) {
-               DPAA2_SET_FLE_BMT(fle);
-               DPAA2_SET_FLE_ADDR(fle, dest);
-       } else {
-               DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
-       }
-       DPAA2_SET_FLE_LEN(fle, len);
-
-       /* Final bit: 1, for last frame list */
-       DPAA2_SET_FLE_FIN(fle);
+       return i;
 }
 
 static int
-dpdmai_dev_enqueue(struct dpaa2_dpdmai_dev *dpdmai_dev,
-                  uint16_t txq_id,
-                  uint16_t vq_id,
-                  struct rte_qdma_job *job)
+dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
+                       uint16_t txq_id,
+                       uint16_t vq_id,
+                       struct rte_qdma_rbp *rbp,
+                       struct rte_qdma_job **job,
+                       uint16_t nb_jobs)
 {
-       struct qdma_io_meta *io_meta;
-       struct qbman_fd fd;
+       struct qbman_fd fd[RTE_QDMA_BURST_NB_MAX];
        struct dpaa2_queue *txq;
-       struct qbman_fle *fle;
        struct qbman_eq_desc eqdesc;
        struct qbman_swp *swp;
        int ret;
-
-       DPAA2_QDMA_FUNC_TRACE();
+       uint32_t num_to_send = 0;
+       uint16_t num_tx = 0;
 
        if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
                ret = dpaa2_affine_qbman_swp();
                if (ret) {
-                       DPAA2_QDMA_ERR("Failure in affining portal");
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
                        return 0;
                }
        }
@@ -422,73 +679,60 @@ dpdmai_dev_enqueue(struct dpaa2_dpdmai_dev *dpdmai_dev,
        qbman_eq_desc_set_no_orp(&eqdesc, 0);
        qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
-       /*
-        * Get an FLE/SDD from FLE pool.
-        * Note: IO metadata is before the FLE and SDD memory.
-        */
-       ret = rte_mempool_get(qdma_dev.fle_pool, (void **)(&io_meta));
-       if (ret) {
-               DPAA2_QDMA_DP_WARN("Memory alloc failed for FLE");
-               return ret;
-       }
-
-       /* Set the metadata */
-       io_meta->cnxt = (size_t)job;
-       io_meta->id = vq_id;
+       memset(fd, 0, RTE_QDMA_BURST_NB_MAX * sizeof(struct qbman_fd));
 
-       fle = (struct qbman_fle *)(io_meta + 1);
+       while (nb_jobs > 0) {
+               uint32_t loop;
 
-       /* populate Frame descriptor */
-       memset(&fd, 0, sizeof(struct qbman_fd));
-       DPAA2_SET_FD_ADDR(&fd, DPAA2_VADDR_TO_IOVA(fle));
-       DPAA2_SET_FD_COMPOUND_FMT(&fd);
-       DPAA2_SET_FD_FRC(&fd, QDMA_SER_CTX);
-
-       /* Populate FLE */
-       memset(fle, 0, QDMA_FLE_POOL_SIZE);
-       dpaa2_qdma_populate_fle(fle, job->src, job->dest, job->len, job->flags);
+               num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
+                       dpaa2_eqcr_size : nb_jobs;
 
-       /* Enqueue the packet to the QBMAN */
-       do {
-               ret = qbman_swp_enqueue_multiple(swp, &eqdesc, &fd, NULL, 1);
-               if (ret < 0 && ret != -EBUSY)
-                       DPAA2_QDMA_ERR("Transmit failure with err: %d", ret);
-       } while (ret == -EBUSY);
+               for (loop = 0; loop < num_to_send; loop++) {
+                       ret = dpdmai_dev_set_fd(&fd[loop],
+                                               job[num_tx], rbp, vq_id);
+                       if (ret < 0) {
+                               /* Set nb_jobs to loop, so outer while loop
+                                * breaks out.
+                                */
+                               nb_jobs = loop;
+                               break;
+                       }
 
-       DPAA2_QDMA_DP_DEBUG("Successfully transmitted a packet");
+                       num_tx++;
+               }
 
-       return ret;
+               /* Enqueue the packet to the QBMAN */
+               uint32_t enqueue_loop = 0, retry_count = 0;
+               while (enqueue_loop < loop) {
+                       ret = qbman_swp_enqueue_multiple(swp,
+                                               &eqdesc,
+                                               &fd[enqueue_loop],
+                                               NULL,
+                                               loop - enqueue_loop);
+                       if (unlikely(ret < 0)) {
+                               retry_count++;
+                               if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
+                                       return num_tx - (loop - enqueue_loop);
+                       } else {
+                               enqueue_loop += ret;
+                               retry_count = 0;
+                       }
+               }
+               nb_jobs -= loop;
+       }
+       return num_tx;
 }
 
-int __rte_experimental
+int
 rte_qdma_vq_enqueue_multi(uint16_t vq_id,
                          struct rte_qdma_job **job,
                          uint16_t nb_jobs)
-{
-       int i, ret;
-
-       DPAA2_QDMA_FUNC_TRACE();
-
-       for (i = 0; i < nb_jobs; i++) {
-               ret = rte_qdma_vq_enqueue(vq_id, job[i]);
-               if (ret < 0)
-                       break;
-       }
-
-       return i;
-}
-
-int __rte_experimental
-rte_qdma_vq_enqueue(uint16_t vq_id,
-                   struct rte_qdma_job *job)
 {
        struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
        struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
        struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
        int ret;
 
-       DPAA2_QDMA_FUNC_TRACE();
-
        /* Return error in case of wrong lcore_id */
        if (rte_lcore_id() != qdma_vq->lcore_id) {
                DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
@@ -496,195 +740,364 @@ rte_qdma_vq_enqueue(uint16_t vq_id,
                return -EINVAL;
        }
 
-       ret = dpdmai_dev_enqueue(dpdmai_dev, qdma_pq->queue_id, vq_id, job);
+       ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
+                                qdma_pq->queue_id,
+                                vq_id,
+                                &qdma_vq->rbp,
+                                job,
+                                nb_jobs);
        if (ret < 0) {
                DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
                return ret;
        }
 
-       qdma_vq->num_enqueues++;
+       qdma_vq->num_enqueues += ret;
 
-       return 1;
+       return ret;
+}
+
+int
+rte_qdma_vq_enqueue(uint16_t vq_id,
+                   struct rte_qdma_job *job)
+{
+       return rte_qdma_vq_enqueue_multi(vq_id, &job, 1);
 }
 
 /* Function to receive a QDMA job for a given device and queue*/
 static int
-dpdmai_dev_dequeue(struct dpaa2_dpdmai_dev *dpdmai_dev,
-                  uint16_t rxq_id,
-                  uint16_t *vq_id,
-                  struct rte_qdma_job **job)
+dpdmai_dev_dequeue_multijob_prefetch(
+                       struct dpaa2_dpdmai_dev *dpdmai_dev,
+                       uint16_t rxq_id,
+                       uint16_t *vq_id,
+                       struct rte_qdma_job **job,
+                       uint16_t nb_jobs)
 {
-       struct qdma_io_meta *io_meta;
        struct dpaa2_queue *rxq;
-       struct qbman_result *dq_storage;
+       struct qbman_result *dq_storage, *dq_storage1 = NULL;
        struct qbman_pull_desc pulldesc;
-       const struct qbman_fd *fd;
        struct qbman_swp *swp;
-       struct qbman_fle *fle;
+       struct queue_storage_info_t *q_storage;
        uint32_t fqid;
-       uint8_t status;
-       int ret;
-
-       DPAA2_QDMA_FUNC_TRACE();
+       uint8_t status, pending;
+       uint8_t num_rx = 0;
+       const struct qbman_fd *fd;
+       uint16_t vqid;
+       int ret, pull_size;
 
        if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
                ret = dpaa2_affine_qbman_swp();
                if (ret) {
-                       DPAA2_QDMA_ERR("Failure in affining portal");
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
                        return 0;
                }
        }
        swp = DPAA2_PER_LCORE_PORTAL;
 
+       pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
        rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-       dq_storage = rxq->q_storage->dq_storage[0];
        fqid = rxq->fqid;
+       q_storage = rxq->q_storage;
+
+       if (unlikely(!q_storage->active_dqs)) {
+               q_storage->toggle = 0;
+               dq_storage = q_storage->dq_storage[q_storage->toggle];
+               q_storage->last_num_pkts = pull_size;
+               qbman_pull_desc_clear(&pulldesc);
+               qbman_pull_desc_set_numframes(&pulldesc,
+                                             q_storage->last_num_pkts);
+               qbman_pull_desc_set_fq(&pulldesc, fqid);
+               qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+                               (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+               if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+                       while (!qbman_check_command_complete(
+                              get_swp_active_dqs(
+                              DPAA2_PER_LCORE_DPIO->index)))
+                               ;
+                       clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+               }
+               while (1) {
+                       if (qbman_swp_pull(swp, &pulldesc)) {
+                               DPAA2_QDMA_DP_WARN(
+                                       "VDQ command not issued.QBMAN busy\n");
+                                       /* Portal was busy, try again */
+                               continue;
+                       }
+                       break;
+               }
+               q_storage->active_dqs = dq_storage;
+               q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+               set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
+                                  dq_storage);
+       }
 
-       /* Prepare dequeue descriptor */
+       dq_storage = q_storage->active_dqs;
+       rte_prefetch0((void *)(size_t)(dq_storage));
+       rte_prefetch0((void *)(size_t)(dq_storage + 1));
+
+       /* Prepare next pull descriptor. This will give space for the
+        * prefething done on DQRR entries
+        */
+       q_storage->toggle ^= 1;
+       dq_storage1 = q_storage->dq_storage[q_storage->toggle];
        qbman_pull_desc_clear(&pulldesc);
+       qbman_pull_desc_set_numframes(&pulldesc, pull_size);
        qbman_pull_desc_set_fq(&pulldesc, fqid);
-       qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-               (uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-       qbman_pull_desc_set_numframes(&pulldesc, 1);
+       qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+               (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
+       /* Check if the previous issued command is completed.
+        * Also seems like the SWP is shared between the Ethernet Driver
+        * and the SEC driver.
+        */
+       while (!qbman_check_command_complete(dq_storage))
+               ;
+       if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+               clear_swp_active_dqs(q_storage->active_dpio_id);
+
+       pending = 1;
+
+       do {
+               /* Loop until the dq_storage is updated with
+                * new token by QBMAN
+                */
+               while (!qbman_check_new_result(dq_storage))
+                       ;
+               rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+               /* Check whether Last Pull command is Expired and
+                * setting Condition for Loop termination
+                */
+               if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+                       pending = 0;
+                       /* Check for valid frame. */
+                       status = qbman_result_DQ_flags(dq_storage);
+                       if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+                               continue;
+               }
+               fd = qbman_result_DQ_fd(dq_storage);
+
+               vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+               if (vq_id)
+                       vq_id[num_rx] = vqid;
+
+               dq_storage++;
+               num_rx++;
+       } while (pending);
+
+       if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+               while (!qbman_check_command_complete(
+                      get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+                       ;
+               clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+       }
+       /* issue a volatile dequeue command for next pull */
        while (1) {
                if (qbman_swp_pull(swp, &pulldesc)) {
-                       DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy");
+                       DPAA2_QDMA_DP_WARN("VDQ command is not issued."
+                                         "QBMAN is busy (2)\n");
                        continue;
                }
                break;
        }
 
-       /* Check if previous issued command is completed. */
-       while (!qbman_check_command_complete(dq_storage))
-               ;
-       /* Loop until dq_storage is updated with new token by QBMAN */
-       while (!qbman_check_new_result(dq_storage))
-               ;
+       q_storage->active_dqs = dq_storage1;
+       q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+       set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-       /* Check for valid frame. */
-       status = qbman_result_DQ_flags(dq_storage);
-       if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0)) {
-               DPAA2_QDMA_DP_DEBUG("No frame is delivered");
-               return 0;
-       }
+       return num_rx;
+}
 
-       /* Get the FD */
-       fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpdmai_dev_dequeue_multijob_no_prefetch(
+               struct dpaa2_dpdmai_dev *dpdmai_dev,
+               uint16_t rxq_id,
+               uint16_t *vq_id,
+               struct rte_qdma_job **job,
+               uint16_t nb_jobs)
+{
+       struct dpaa2_queue *rxq;
+       struct qbman_result *dq_storage;
+       struct qbman_pull_desc pulldesc;
+       struct qbman_swp *swp;
+       uint32_t fqid;
+       uint8_t status, pending;
+       uint8_t num_rx = 0;
+       const struct qbman_fd *fd;
+       uint16_t vqid;
+       int ret, next_pull = nb_jobs, num_pulled = 0;
 
-       /*
-        * Fetch metadata from FLE. job and vq_id were set
-        * in metadata in the enqueue operation.
-        */
-       fle = (struct qbman_fle *)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-       io_meta = (struct qdma_io_meta *)(fle) - 1;
-       if (vq_id)
-               *vq_id = io_meta->id;
+       if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               ret = dpaa2_affine_qbman_swp();
+               if (ret) {
+                       DPAA2_QDMA_ERR(
+                               "Failed to allocate IO portal, tid: %d\n",
+                               rte_gettid());
+                       return 0;
+               }
+       }
+       swp = DPAA2_PER_LCORE_PORTAL;
 
-       *job = (struct rte_qdma_job *)(size_t)io_meta->cnxt;
-       (*job)->status = DPAA2_GET_FD_ERR(fd);
+       rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+       fqid = rxq->fqid;
 
-       /* Free FLE to the pool */
-       rte_mempool_put(qdma_dev.fle_pool, io_meta);
+       do {
+               dq_storage = rxq->q_storage->dq_storage[0];
+               /* Prepare dequeue descriptor */
+               qbman_pull_desc_clear(&pulldesc);
+               qbman_pull_desc_set_fq(&pulldesc, fqid);
+               qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+                       (uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+
+               if (next_pull > dpaa2_dqrr_size) {
+                       qbman_pull_desc_set_numframes(&pulldesc,
+                                       dpaa2_dqrr_size);
+                       next_pull -= dpaa2_dqrr_size;
+               } else {
+                       qbman_pull_desc_set_numframes(&pulldesc, next_pull);
+                       next_pull = 0;
+               }
 
-       DPAA2_QDMA_DP_DEBUG("packet received");
+               while (1) {
+                       if (qbman_swp_pull(swp, &pulldesc)) {
+                               DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy");
+                               /* Portal was busy, try again */
+                               continue;
+                       }
+                       break;
+               }
 
-       return 1;
-}
+               rte_prefetch0((void *)((size_t)(dq_storage + 1)));
+               /* Check if the previous issued command is completed. */
+               while (!qbman_check_command_complete(dq_storage))
+                       ;
+
+               num_pulled = 0;
+               pending = 1;
+
+               do {
+                       /* Loop until dq_storage is updated
+                        * with new token by QBMAN
+                        */
+                       while (!qbman_check_new_result(dq_storage))
+                               ;
+                       rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+
+                       if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+                               pending = 0;
+                               /* Check for valid frame. */
+                               status = qbman_result_DQ_flags(dq_storage);
+                               if (unlikely((status &
+                                       QBMAN_DQ_STAT_VALIDFRAME) == 0))
+                                       continue;
+                       }
+                       fd = qbman_result_DQ_fd(dq_storage);
 
-int __rte_experimental
-rte_qdma_vq_dequeue_multi(uint16_t vq_id,
-                         struct rte_qdma_job **job,
-                         uint16_t nb_jobs)
-{
-       int i;
+                       vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+                       if (vq_id)
+                               vq_id[num_rx] = vqid;
 
-       DPAA2_QDMA_FUNC_TRACE();
+                       dq_storage++;
+                       num_rx++;
+                       num_pulled++;
 
-       for (i = 0; i < nb_jobs; i++) {
-               job[i] = rte_qdma_vq_dequeue(vq_id);
-               if (!job[i])
-                       break;
-       }
+               } while (pending);
+       /* Last VDQ provided all packets and more packets are requested */
+       } while (next_pull && num_pulled == dpaa2_dqrr_size);
 
-       return i;
+       return num_rx;
 }
 
-struct rte_qdma_job * __rte_experimental
-rte_qdma_vq_dequeue(uint16_t vq_id)
+int
+rte_qdma_vq_dequeue_multi(uint16_t vq_id,
+                         struct rte_qdma_job **job,
+                         uint16_t nb_jobs)
 {
        struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
        struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
-       struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-       struct rte_qdma_job *job = NULL;
        struct qdma_virt_queue *temp_qdma_vq;
-       int dequeue_budget = QDMA_DEQUEUE_BUDGET;
-       int ring_count, ret, i;
-       uint16_t temp_vq_id;
-
-       DPAA2_QDMA_FUNC_TRACE();
+       struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+       int ring_count, ret = 0, i;
 
        /* Return error in case of wrong lcore_id */
        if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
-               DPAA2_QDMA_ERR("QDMA dequeue for vqid %d on wrong core",
+               DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
                                vq_id);
-               return NULL;
+               return -1;
        }
 
        /* Only dequeue when there are pending jobs on VQ */
        if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
-               return NULL;
+               return 0;
+
+       if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
+               nb_jobs = (qdma_vq->num_enqueues -  qdma_vq->num_dequeues);
 
        if (qdma_vq->exclusive_hw_queue) {
                /* In case of exclusive queue directly fetch from HW queue */
-               ret = dpdmai_dev_dequeue(dpdmai_dev, qdma_pq->queue_id,
-                                        NULL, &job);
+               ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
+                                        NULL, job, nb_jobs);
                if (ret < 0) {
                        DPAA2_QDMA_ERR(
                                "Dequeue from DPDMAI device failed: %d", ret);
-                       return NULL;
+                       return ret;
                }
+               qdma_vq->num_dequeues += ret;
        } else {
+               uint16_t temp_vq_id[RTE_QDMA_BURST_NB_MAX];
                /*
                 * Get the QDMA completed jobs from the software ring.
                 * In case they are not available on the ring poke the HW
                 * to fetch completed jobs from corresponding HW queues
                 */
                ring_count = rte_ring_count(qdma_vq->status_ring);
-               if (ring_count == 0) {
+               if (ring_count < nb_jobs) {
                        /* TODO - How to have right budget */
-                       for (i = 0; i < dequeue_budget; i++) {
-                               ret = dpdmai_dev_dequeue(dpdmai_dev,
-                                       qdma_pq->queue_id, &temp_vq_id, &job);
-                               if (ret == 0)
-                                       break;
-                               temp_qdma_vq = &qdma_vqs[temp_vq_id];
+                       ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
+                                       qdma_pq->queue_id,
+                                       temp_vq_id, job, nb_jobs);
+                       for (i = 0; i < ret; i++) {
+                               temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
                                rte_ring_enqueue(temp_qdma_vq->status_ring,
-                                       (void *)(job));
-                               ring_count = rte_ring_count(
-                                       qdma_vq->status_ring);
-                               if (ring_count)
-                                       break;
+                                       (void *)(job[i]));
                        }
+                       ring_count = rte_ring_count(
+                                       qdma_vq->status_ring);
                }
 
-               /* Dequeue job from the software ring to provide to the user */
-               rte_ring_dequeue(qdma_vq->status_ring, (void **)&job);
-               if (job)
-                       qdma_vq->num_dequeues++;
+               if (ring_count) {
+                       /* Dequeue job from the software ring
+                        * to provide to the user
+                        */
+                       ret = rte_ring_dequeue_bulk(qdma_vq->status_ring,
+                                       (void **)job, ring_count, NULL);
+                       if (ret)
+                               qdma_vq->num_dequeues += ret;
+               }
        }
 
+       return ret;
+}
+
+struct rte_qdma_job *
+rte_qdma_vq_dequeue(uint16_t vq_id)
+{
+       int ret;
+       struct rte_qdma_job *job = NULL;
+
+       ret = rte_qdma_vq_dequeue_multi(vq_id, &job, 1);
+       if (ret < 0)
+               DPAA2_QDMA_DP_WARN("DPDMAI device dequeue failed: %d", ret);
+
        return job;
 }
 
-void __rte_experimental
+void
 rte_qdma_vq_stats(uint16_t vq_id,
                  struct rte_qdma_vq_stats *vq_status)
 {
        struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
 
-       DPAA2_QDMA_FUNC_TRACE();
-
        if (qdma_vq->in_use) {
                vq_status->exclusive_hw_queue = qdma_vq->exclusive_hw_queue;
                vq_status->lcore_id = qdma_vq->lcore_id;
@@ -695,7 +1108,7 @@ rte_qdma_vq_stats(uint16_t vq_id,
        }
 }
 
-int __rte_experimental
+int
 rte_qdma_vq_destroy(uint16_t vq_id)
 {
        struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
@@ -719,12 +1132,41 @@ rte_qdma_vq_destroy(uint16_t vq_id)
 
        memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
 
+       rte_spinlock_unlock(&qdma_dev.lock);
+
+       return 0;
+}
+
+int
+rte_qdma_vq_destroy_rbp(uint16_t vq_id)
+{
+       struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+
+       DPAA2_QDMA_FUNC_TRACE();
+
+       /* In case there are pending jobs on any VQ, return -EBUSY */
+       if (qdma_vq->num_enqueues != qdma_vq->num_dequeues)
+               return -EBUSY;
+
        rte_spinlock_lock(&qdma_dev.lock);
 
+       if (qdma_vq->exclusive_hw_queue) {
+               free_hw_queue(qdma_vq->hw_queue);
+       } else {
+               if (qdma_vqs->status_ring)
+                       rte_ring_free(qdma_vqs->status_ring);
+
+               put_hw_queue(qdma_vq->hw_queue);
+       }
+
+       memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
+
+       rte_spinlock_unlock(&qdma_dev.lock);
+
        return 0;
 }
 
-void __rte_experimental
+void
 rte_qdma_stop(void)
 {
        DPAA2_QDMA_FUNC_TRACE();
@@ -732,7 +1174,7 @@ rte_qdma_stop(void)
        qdma_dev.state = 0;
 }
 
-void __rte_experimental
+void
 rte_qdma_destroy(void)
 {
        DPAA2_QDMA_FUNC_TRACE();
@@ -793,9 +1235,6 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
 
        DPAA2_QDMA_FUNC_TRACE();
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return 0;
-
        /* Remove HW queues from global list */
        remove_hw_queues_from_list(dpdmai_dev);
 
@@ -805,7 +1244,7 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
                DPAA2_QDMA_ERR("dmdmai disable failed");
 
        /* Set up the DQRR storage for Rx */
-       for (i = 0; i < DPDMAI_PRIO_NUM; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[i]);
 
                if (rxq->q_storage) {
@@ -822,6 +1261,43 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
        return 0;
 }
 
+static int
+check_devargs_handler(__rte_unused const char *key, const char *value,
+                     __rte_unused void *opaque)
+{
+       if (strcmp(value, "1"))
+               return -1;
+
+       return 0;
+}
+
+static int
+dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+       struct rte_kvargs *kvlist;
+
+       if (!devargs)
+               return 0;
+
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (!kvlist)
+               return 0;
+
+       if (!rte_kvargs_count(kvlist, key)) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+
+       if (rte_kvargs_process(kvlist, key,
+                              check_devargs_handler, NULL) < 0) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       rte_kvargs_free(kvlist);
+
+       return 1;
+}
+
 static int
 dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 {
@@ -834,13 +1310,9 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 
        DPAA2_QDMA_FUNC_TRACE();
 
-       /* For secondary processes, the primary has done all the work */
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return 0;
-
        /* Open DPDMAI device */
        dpdmai_dev->dpdmai_id = dpdmai_id;
-       dpdmai_dev->dpdmai.regs = rte_mcp_ptr_list[MC_PORTAL_INDEX];
+       dpdmai_dev->dpdmai.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
        ret = dpdmai_open(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
                          dpdmai_dev->dpdmai_id, &dpdmai_dev->token);
        if (ret) {
@@ -856,17 +1328,17 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
                               ret);
                goto init_err;
        }
-       dpdmai_dev->num_queues = attr.num_of_priorities;
+       dpdmai_dev->num_queues = attr.num_of_queues;
 
        /* Set up Rx Queues */
-       for (i = 0; i < attr.num_of_priorities; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                struct dpaa2_queue *rxq;
 
                memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg));
                ret = dpdmai_set_rx_queue(&dpdmai_dev->dpdmai,
                                          CMD_PRI_LOW,
                                          dpdmai_dev->token,
-                                         i, &rx_queue_cfg);
+                                         i, 0, &rx_queue_cfg);
                if (ret) {
                        DPAA2_QDMA_ERR("Setting Rx queue failed with err: %d",
                                       ret);
@@ -893,9 +1365,9 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
        }
 
        /* Get Rx and Tx queues FQID's */
-       for (i = 0; i < DPDMAI_PRIO_NUM; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                ret = dpdmai_get_rx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-                                         dpdmai_dev->token, i, &rx_attr);
+                                         dpdmai_dev->token, i, 0, &rx_attr);
                if (ret) {
                        DPAA2_QDMA_ERR("Reading device failed with err: %d",
                                       ret);
@@ -904,7 +1376,7 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
                dpdmai_dev->rx_queue[i].fqid = rx_attr.fqid;
 
                ret = dpdmai_get_tx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-                                         dpdmai_dev->token, i, &tx_attr);
+                                         dpdmai_dev->token, i, 0, &tx_attr);
                if (ret) {
                        DPAA2_QDMA_ERR("Reading device failed with err: %d",
                                       ret);
@@ -927,6 +1399,32 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
                DPAA2_QDMA_ERR("Adding H/W queue to list failed");
                goto init_err;
        }
+
+       if (dpaa2_get_devargs(rawdev->device->devargs,
+               DPAA2_QDMA_NO_PREFETCH)) {
+               /* If no prefetch is configured. */
+               dpdmai_dev_dequeue_multijob =
+                               dpdmai_dev_dequeue_multijob_no_prefetch;
+               DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
+       } else {
+               dpdmai_dev_dequeue_multijob =
+                       dpdmai_dev_dequeue_multijob_prefetch;
+       }
+
+       if (!dpaa2_coherent_no_alloc_cache) {
+               if (dpaa2_svr_family == SVR_LX2160A) {
+                       dpaa2_coherent_no_alloc_cache =
+                               DPAA2_LX2_COHERENT_NO_ALLOCATE_CACHE;
+                       dpaa2_coherent_alloc_cache =
+                               DPAA2_LX2_COHERENT_ALLOCATE_CACHE;
+               } else {
+                       dpaa2_coherent_no_alloc_cache =
+                               DPAA2_COHERENT_NO_ALLOCATE_CACHE;
+                       dpaa2_coherent_alloc_cache =
+                               DPAA2_COHERENT_ALLOCATE_CACHE;
+               }
+       }
+
        DPAA2_QDMA_DEBUG("Initialized dpdmai object successfully");
 
        return 0;
@@ -985,18 +1483,13 @@ rte_dpaa2_qdma_remove(struct rte_dpaa2_device *dpaa2_dev)
 }
 
 static struct rte_dpaa2_driver rte_dpaa2_qdma_pmd = {
+       .drv_flags = RTE_DPAA2_DRV_IOVA_AS_VA,
        .drv_type = DPAA2_QDMA,
        .probe = rte_dpaa2_qdma_probe,
        .remove = rte_dpaa2_qdma_remove,
 };
 
 RTE_PMD_REGISTER_DPAA2(dpaa2_qdma, rte_dpaa2_qdma_pmd);
-
-RTE_INIT(dpaa2_qdma_init_log);
-static void
-dpaa2_qdma_init_log(void)
-{
-       dpaa2_qdma_logtype = rte_log_register("pmd.raw.dpaa2.qdma");
-       if (dpaa2_qdma_logtype >= 0)
-               rte_log_set_level(dpaa2_qdma_logtype, RTE_LOG_INFO);
-}
+RTE_PMD_REGISTER_PARAM_STRING(dpaa2_qdma,
+       "no_prefetch=<int> ");
+RTE_LOG_REGISTER(dpaa2_qdma_logtype, pmd.raw.dpaa2.qdma, INFO);