From 4f166de6584ec08b937d51d14cb5e3a82665b02e Mon Sep 17 00:00:00 2001
From: Jun Yang <jun.yang@nxp.com>
Date: Thu, 15 Oct 2020 17:47:04 +0800
Subject: [PATCH] raw/dpaa2_qdma: refactor the code

This patch moves qdma queue specific configurations from driver
global configuration to per-queue setup. This is required
as each queue can be configured differently.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1270 +++++++++----------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   39 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   13 +-
 3 files changed, 675 insertions(+), 647 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 09e295fe84..6eef7a57ab 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -26,6 +26,9 @@
 
 #define DPAA2_QDMA_NO_PREFETCH "no_prefetch"
 
+/* Dynamic log type identifier */
+int dpaa2_qdma_logtype;
+
 uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
@@ -37,31 +40,9 @@ TAILQ_HEAD(qdma_hw_queue_list, qdma_hw_queue);
 static struct qdma_hw_queue_list qdma_queue_list
 	= TAILQ_HEAD_INITIALIZER(qdma_queue_list);
 
-/* QDMA Virtual Queues */
-static struct qdma_virt_queue *qdma_vqs;
-
 /* QDMA per core data */
 static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 
-typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
-					    uint16_t rxq_id,
-					    uint16_t *vq_id,
-					    struct rte_qdma_job **job,
-					    uint16_t nb_jobs);
-
-dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
-
-typedef uint16_t (dpdmai_dev_get_job_t)(struct qdma_device *qdma_dev,
-					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
-typedef int (dpdmai_dev_set_fd_t)(struct qdma_device *qdma_dev,
-				  struct qbman_fd *fd,
-				  struct rte_qdma_job *job,
-				  struct rte_qdma_rbp *rbp,
-				  uint16_t vq_id);
-dpdmai_dev_get_job_t *dpdmai_dev_get_job;
-dpdmai_dev_set_fd_t *dpdmai_dev_set_fd;
-
 static inline int
 qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 			uint32_t len, struct qbman_fd *fd,
@@ -114,7 +95,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 	/**
 	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
 	 * Coherent copy of cacheable memory,
-	 * lookup in downstream cache, no allocate
+	* lookup in downstream cache, no allocate
 	 * on miss
 	 */
 	fd->simple_ddr.rns = 0;
@@ -204,12 +185,11 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 }
 
 static inline int dpdmai_dev_set_fd_us(
-				struct qdma_device *qdma_dev __rte_unused,
-				struct qbman_fd *fd,
-				struct rte_qdma_job *job,
-				struct rte_qdma_rbp *rbp,
-				uint16_t vq_id)
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
 	int ret = 0;
@@ -220,7 +200,7 @@ static inline int dpdmai_dev_set_fd_us(
 		iova = (size_t)job->src;
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*ppjob = job;
 
@@ -234,15 +214,17 @@ static inline int dpdmai_dev_set_fd_us(
 					   job->len, fd);
 	return ret;
 }
-static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
-					struct qbman_fd *fd,
-					struct rte_qdma_job *job,
-					struct rte_qdma_rbp *rbp,
-					uint16_t vq_id)
+static inline int dpdmai_dev_set_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	struct qbman_fle *fle;
 	int ret = 0;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
@@ -254,7 +236,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 	}
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	*ppjob = job;
 
 	fle = (struct qbman_fle *)(ppjob + 1);
@@ -272,7 +254,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 }
 
 static inline uint16_t dpdmai_dev_get_job_us(
-				struct qdma_device *qdma_dev __rte_unused,
+				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
 				struct rte_qdma_job **job)
 {
@@ -281,7 +263,7 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	struct rte_qdma_job **ppjob;
 
 	if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
-		iova = (size_t) (((uint64_t)fd->simple_pci.daddr_hi) << 32
+		iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
 				| (uint64_t)fd->simple_pci.daddr_lo);
 	else
 		iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
@@ -289,18 +271,22 @@ static inline uint16_t dpdmai_dev_get_job_us(
 
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error);
+	(*job)->status = (fd->simple_pci.acc_err << 8) |
+					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
 
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
-					     const struct qbman_fd *fd,
-					     struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_lf(
+						struct qdma_virt_queue *qdma_vq,
+						const struct qbman_fd *fd,
+						struct rte_qdma_job **job)
 {
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Fetch metadata from FLE. job and vq_id were set
 	 * in metadata in the enqueue operation.
@@ -320,351 +306,268 @@ static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
 	return vqid;
 }
 
-static struct qdma_hw_queue *
-alloc_hw_queue(uint32_t lcore_id)
+/* Function to receive a QDMA job for a given device and queue*/
+static int
+dpdmai_dev_dequeue_multijob_prefetch(
+			struct qdma_virt_queue *qdma_vq,
+			uint16_t *vq_id,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs)
 {
-	struct qdma_hw_queue *queue = NULL;
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	DPAA2_QDMA_FUNC_TRACE();
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage, *dq_storage1 = NULL;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	struct queue_storage_info_t *q_storage;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, pull_size;
 
-	/* Get a free queue from the list */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
-		if (queue->num_users == 0) {
-			queue->lcore_id = lcore_id;
-			queue->num_users++;
-			break;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
 		}
 	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	return queue;
-}
-
-static void
-free_hw_queue(struct qdma_hw_queue *queue)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	queue->num_users--;
-}
-
-
-static struct qdma_hw_queue *
-get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
-{
-	struct qdma_per_core_info *core_info;
-	struct qdma_hw_queue *queue, *temp;
-	uint32_t least_num_users;
-	int num_hw_queues, i;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	core_info = &qdma_core_info[lcore_id];
-	num_hw_queues = core_info->num_hw_queues;
+	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
+	q_storage = rxq->q_storage;
 
-	/*
-	 * Allocate a HW queue if there are less queues
-	 * than maximum per core queues configured
-	 */
-	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
-		queue = alloc_hw_queue(lcore_id);
-		if (queue) {
-			core_info->hw_queues[num_hw_queues] = queue;
-			core_info->num_hw_queues++;
-			return queue;
+	if (unlikely(!q_storage->active_dqs)) {
+		q_storage->toggle = 0;
+		dq_storage = q_storage->dq_storage[q_storage->toggle];
+		q_storage->last_num_pkts = pull_size;
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_numframes(&pulldesc,
+					      q_storage->last_num_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+			while (!qbman_check_command_complete(
+				get_swp_active_dqs(
+				DPAA2_PER_LCORE_DPIO->index)))
+				;
+			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+		}
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued.QBMAN busy\n");
+					/* Portal was busy, try again */
+				continue;
+			}
+			break;
 		}
+		q_storage->active_dqs = dq_storage;
+		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
+				   dq_storage);
 	}
 
-	queue = core_info->hw_queues[0];
-	/* In case there is no queue associated with the core return NULL */
-	if (!queue)
-		return NULL;
+	dq_storage = q_storage->active_dqs;
+	rte_prefetch0((void *)(size_t)(dq_storage));
+	rte_prefetch0((void *)(size_t)(dq_storage + 1));
 
-	/* Fetch the least loaded H/W queue */
-	least_num_users = core_info->hw_queues[0]->num_users;
-	for (i = 0; i < num_hw_queues; i++) {
-		temp = core_info->hw_queues[i];
-		if (temp->num_users < least_num_users)
-			queue = temp;
-	}
+	/* Prepare next pull descriptor. This will give space for the
+	 * prefething done on DQRR entries
+	 */
+	q_storage->toggle ^= 1;
+	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
+	qbman_pull_desc_clear(&pulldesc);
+	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
+	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
-	if (queue)
-		queue->num_users++;
+	/* Check if the previous issued command is completed.
+	 * Also seems like the SWP is shared between the Ethernet Driver
+	 * and the SEC driver.
+	 */
+	while (!qbman_check_command_complete(dq_storage))
+		;
+	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+		clear_swp_active_dqs(q_storage->active_dpio_id);
 
-	return queue;
-}
+	pending = 1;
 
-static void
-put_hw_queue(struct qdma_hw_queue *queue)
-{
-	struct qdma_per_core_info *core_info;
-	int lcore_id, num_hw_queues, i;
+	do {
+		/* Loop until the dq_storage is updated with
+		 * new token by QBMAN
+		 */
+		while (!qbman_check_new_result(dq_storage))
+			;
+		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+		/* Check whether Last Pull command is Expired and
+		 * setting Condition for Loop termination
+		 */
+		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+			pending = 0;
+			/* Check for valid frame. */
+			status = qbman_result_DQ_flags(dq_storage);
+			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+				continue;
+		}
+		fd = qbman_result_DQ_fd(dq_storage);
 
-	DPAA2_QDMA_FUNC_TRACE();
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		if (vq_id)
+			vq_id[num_rx] = vqid;
 
-	/*
-	 * If this is the last user of the queue free it.
-	 * Also remove it from QDMA core info.
-	 */
-	if (queue->num_users == 1) {
-		free_hw_queue(queue);
+		dq_storage++;
+		num_rx++;
+	} while (pending);
 
-		/* Remove the physical queue from core info */
-		lcore_id = queue->lcore_id;
-		core_info = &qdma_core_info[lcore_id];
-		num_hw_queues = core_info->num_hw_queues;
-		for (i = 0; i < num_hw_queues; i++) {
-			if (queue == core_info->hw_queues[i])
-				break;
+	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+		while (!qbman_check_command_complete(
+			get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+			;
+		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	}
+	/* issue a volatile dequeue command for next pull */
+	while (1) {
+		if (qbman_swp_pull(swp, &pulldesc)) {
+			DPAA2_QDMA_DP_WARN(
+				"VDQ command is not issued. QBMAN is busy (2)\n");
+			continue;
 		}
-		for (; i < num_hw_queues - 1; i++)
-			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
-		core_info->hw_queues[i] = NULL;
-	} else {
-		queue->num_users--;
+		break;
 	}
-}
-
-static int
-dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
-		    __rte_unused const char *attr_name,
-		    uint64_t *attr_value)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
-
-	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+	q_storage->active_dqs = dq_storage1;
+	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	return 0;
+	return num_rx;
 }
 
 static int
-dpaa2_qdma_reset(struct rte_rawdev *rawdev)
+dpdmai_dev_dequeue_multijob_no_prefetch(
+		struct qdma_virt_queue *qdma_vq,
+		uint16_t *vq_id,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
-	struct qdma_hw_queue *queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	int i;
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	DPAA2_QDMA_FUNC_TRACE();
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, next_pull = nb_jobs, num_pulled = 0;
 
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before reset.");
-		return -EBUSY;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
+		}
 	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	/* In case there are pending jobs on any VQ, return -EBUSY */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use && (qdma_vqs[i].num_enqueues !=
-		    qdma_vqs[i].num_dequeues)) {
-			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
-			return -EBUSY;
-		}
-	}
-
-	/* Reset HW queues */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next)
-		queue->num_users = 0;
-
-	/* Reset and free virtual queues */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-	}
-	if (qdma_vqs)
-		rte_free(qdma_vqs);
-	qdma_vqs = NULL;
-
-	/* Reset per core info */
-	memset(&qdma_core_info, 0,
-		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
-
-	/* Free the FLE pool */
-	if (qdma_dev->fle_pool)
-		rte_mempool_free(qdma_dev->fle_pool);
-
-	/* Reset QDMA device structure */
-	qdma_dev->mode = RTE_QDMA_MODE_HW;
-	qdma_dev->max_hw_queues_per_core = 0;
-	qdma_dev->fle_pool = NULL;
-	qdma_dev->fle_pool_count = 0;
-	qdma_dev->max_vqs = 0;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
-			 rte_rawdev_obj_t config,
-			 size_t config_size)
-{
-	char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
-	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	if (config_size != sizeof(*qdma_config))
-		return -EINVAL;
-
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before config.");
-		return -1;
-	}
-
-	/* Set mode */
-	qdma_dev->mode = qdma_config->mode;
-
-	/* Set max HW queue per core */
-	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
-		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
-			       MAX_HW_QUEUE_PER_CORE);
-		return -EINVAL;
-	}
-	qdma_dev->max_hw_queues_per_core =
-		qdma_config->max_hw_queues_per_core;
-
-	/* Allocate Virtual Queues */
-	qdma_vqs = rte_malloc("qdma_virtual_queues",
-			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
-			RTE_CACHE_LINE_SIZE);
-	if (!qdma_vqs) {
-		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
-		return -ENOMEM;
-	}
-	qdma_dev->max_vqs = qdma_config->max_vqs;
-
-	/* Allocate FLE pool; just append PID so that in case of
-	 * multiprocess, the pool's don't collide.
-	 */
-	snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
-		 getpid());
-	qdma_dev->fle_pool = rte_mempool_create(fle_pool_name,
-			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
-			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		rte_free(qdma_vqs);
-		qdma_vqs = NULL;
-		return -ENOMEM;
-	}
-	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
-
-	if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_us;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_us;
-	} else {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_lf;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_lf;
-	}
-	return 0;
-}
-
-static int
-dpaa2_qdma_start(struct rte_rawdev *rawdev)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	qdma_dev->state = 1;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
-			  __rte_unused uint16_t queue_id,
-			  rte_rawdev_obj_t queue_conf,
-			  size_t conf_size)
-{
-	char ring_name[32];
-	int i;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_queue_config *q_config =
-		(struct rte_qdma_queue_config *)queue_conf;
-
-	DPAA2_QDMA_FUNC_TRACE();
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
 
-	if (conf_size != sizeof(*q_config))
-		return -EINVAL;
+	do {
+		dq_storage = rxq->q_storage->dq_storage[0];
+		/* Prepare dequeue descriptor */
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 
-	rte_spinlock_lock(&qdma_dev->lock);
+		if (next_pull > dpaa2_dqrr_size) {
+			qbman_pull_desc_set_numframes(&pulldesc,
+					dpaa2_dqrr_size);
+			next_pull -= dpaa2_dqrr_size;
+		} else {
+			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
+			next_pull = 0;
+		}
 
-	/* Get a free Virtual Queue */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use == 0)
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued. QBMAN busy");
+				/* Portal was busy, try again */
+				continue;
+			}
 			break;
-	}
+		}
 
-	/* Return in case no VQ is free */
-	if (i == qdma_dev->max_vqs) {
-		rte_spinlock_unlock(&qdma_dev->lock);
-		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
-		return -ENODEV;
-	}
+		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
+		/* Check if the previous issued command is completed. */
+		while (!qbman_check_command_complete(dq_storage))
+			;
 
-	if (qdma_dev->mode == RTE_QDMA_MODE_HW ||
-			(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
-		/* Allocate HW queue for a VQ */
-		qdma_vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 1;
-	} else {
-		/* Allocate a Ring for Virutal Queue in VQ mode */
-		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
-		qdma_vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev->fle_pool_count, rte_socket_id(), 0);
-		if (!qdma_vqs[i].status_ring) {
-			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			rte_spinlock_unlock(&qdma_dev->lock);
-			return rte_errno;
-		}
+		num_pulled = 0;
+		pending = 1;
 
-		/* Get a HW queue (shared) for a VQ */
-		qdma_vqs[i].hw_queue = get_hw_queue(qdma_dev,
-						    q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 0;
-	}
+		do {
+			/* Loop until dq_storage is updated
+			 * with new token by QBMAN
+			 */
+			while (!qbman_check_new_result(dq_storage))
+				;
+			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
 
-	if (qdma_vqs[i].hw_queue == NULL) {
-		DPAA2_QDMA_ERR("No H/W queue available for VQ");
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-		qdma_vqs[i].status_ring = NULL;
-		rte_spinlock_unlock(&qdma_dev->lock);
-		return -ENODEV;
-	}
+			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+				pending = 0;
+				/* Check for valid frame. */
+				status = qbman_result_DQ_flags(dq_storage);
+				if (unlikely((status &
+					QBMAN_DQ_STAT_VALIDFRAME) == 0))
+					continue;
+			}
+			fd = qbman_result_DQ_fd(dq_storage);
 
-	qdma_vqs[i].in_use = 1;
-	qdma_vqs[i].lcore_id = q_config->lcore_id;
-	memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
-	rte_spinlock_unlock(&qdma_dev->lock);
+			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			if (vq_id)
+				vq_id[num_rx] = vqid;
 
-	if (q_config->rbp != NULL)
-		memcpy(&qdma_vqs[i].rbp, q_config->rbp,
-		       sizeof(struct rte_qdma_rbp));
+			dq_storage++;
+			num_rx++;
+			num_pulled++;
 
-	return i;
+		} while (pending);
+	/* Last VDQ provided all packets and more packets are requested */
+	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+
+	return num_rx;
 }
 
 static int
-dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t txq_id,
-			uint16_t vq_id,
-			struct rte_qdma_rbp *rbp,
+dpdmai_dev_enqueue_multi(
+			struct qdma_virt_queue *qdma_vq,
 			struct rte_qdma_job **job,
 			uint16_t nb_jobs)
 {
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t txq_id = qdma_pq->queue_id;
+
 	struct qbman_fd fd[RTE_QDMA_BURST_NB_MAX];
 	struct dpaa2_queue *txq;
 	struct qbman_eq_desc eqdesc;
@@ -701,8 +604,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 			dpaa2_eqcr_size : nb_jobs;
 
 		for (loop = 0; loop < num_to_send; loop++) {
-			ret = dpdmai_dev_set_fd(dpdmai_dev->qdma_dev, &fd[loop],
-						job[num_tx], rbp, vq_id);
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
 			if (ret < 0) {
 				/* Set nb_jobs to loop, so outer while loop
 				 * breaks out.
@@ -716,6 +618,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 
 		/* Enqueue the packet to the QBMAN */
 		uint32_t enqueue_loop = 0, retry_count = 0;
+
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
@@ -736,299 +639,434 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	return num_tx;
 }
 
-static int
-dpaa2_qdma_enqueue(__rte_unused struct rte_rawdev *rawdev,
-		  __rte_unused struct rte_rawdev_buf **buffers,
-		  unsigned int nb_jobs,
-		  rte_rawdev_obj_t context)
+static struct qdma_hw_queue *
+alloc_hw_queue(uint32_t lcore_id)
 {
-	struct rte_qdma_enqdeq *e_context = (struct rte_qdma_enqdeq *)context;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[e_context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-	int ret;
+	struct qdma_hw_queue *queue = NULL;
 
-	/* Return error in case of wrong lcore_id */
-	if (rte_lcore_id() != qdma_vq->lcore_id) {
-		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
-				e_context->vq_id);
-		return -EINVAL;
-	}
+	DPAA2_QDMA_FUNC_TRACE();
 
-	ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
-				 qdma_pq->queue_id,
-				 e_context->vq_id,
-				 &qdma_vq->rbp,
-				 e_context->job,
-				 nb_jobs);
-	if (ret < 0) {
-		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
-		return ret;
+	/* Get a free queue from the list */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
+		if (queue->num_users == 0) {
+			queue->lcore_id = lcore_id;
+			queue->num_users++;
+			break;
+		}
 	}
 
-	qdma_vq->num_enqueues += ret;
+	return queue;
+}
 
-	return ret;
+static void
+free_hw_queue(struct qdma_hw_queue *queue)
+{
+	DPAA2_QDMA_FUNC_TRACE();
+
+	queue->num_users--;
 }
 
-/* Function to receive a QDMA job for a given device and queue*/
-static int
-dpdmai_dev_dequeue_multijob_prefetch(
-			struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t rxq_id,
-			uint16_t *vq_id,
-			struct rte_qdma_job **job,
-			uint16_t nb_jobs)
+
+static struct qdma_hw_queue *
+get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage, *dq_storage1 = NULL;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	struct queue_storage_info_t *q_storage;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, pull_size;
+	struct qdma_per_core_info *core_info;
+	struct qdma_hw_queue *queue, *temp;
+	uint32_t least_num_users;
+	int num_hw_queues, i;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	core_info = &qdma_core_info[lcore_id];
+	num_hw_queues = core_info->num_hw_queues;
+
+	/*
+	 * Allocate a HW queue if there are less queues
+	 * than maximum per core queues configured
+	 */
+	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
+		queue = alloc_hw_queue(lcore_id);
+		if (queue) {
+			core_info->hw_queues[num_hw_queues] = queue;
+			core_info->num_hw_queues++;
+			return queue;
 		}
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
-	q_storage = rxq->q_storage;
+	queue = core_info->hw_queues[0];
+	/* In case there is no queue associated with the core return NULL */
+	if (!queue)
+		return NULL;
 
-	if (unlikely(!q_storage->active_dqs)) {
-		q_storage->toggle = 0;
-		dq_storage = q_storage->dq_storage[q_storage->toggle];
-		q_storage->last_num_pkts = pull_size;
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_numframes(&pulldesc,
-					      q_storage->last_num_pkts);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-			while (!qbman_check_command_complete(
-			       get_swp_active_dqs(
-			       DPAA2_PER_LCORE_DPIO->index)))
-				;
-			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	/* Fetch the least loaded H/W queue */
+	least_num_users = core_info->hw_queues[0]->num_users;
+	for (i = 0; i < num_hw_queues; i++) {
+		temp = core_info->hw_queues[i];
+		if (temp->num_users < least_num_users)
+			queue = temp;
+	}
+
+	if (queue)
+		queue->num_users++;
+
+	return queue;
+}
+
+static void
+put_hw_queue(struct qdma_hw_queue *queue)
+{
+	struct qdma_per_core_info *core_info;
+	int lcore_id, num_hw_queues, i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/*
+	 * If this is the last user of the queue free it.
+	 * Also remove it from QDMA core info.
+	 */
+	if (queue->num_users == 1) {
+		free_hw_queue(queue);
+
+		/* Remove the physical queue from core info */
+		lcore_id = queue->lcore_id;
+		core_info = &qdma_core_info[lcore_id];
+		num_hw_queues = core_info->num_hw_queues;
+		for (i = 0; i < num_hw_queues; i++) {
+			if (queue == core_info->hw_queues[i])
+				break;
 		}
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued.QBMAN busy\n");
-					/* Portal was busy, try again */
-				continue;
-			}
-			break;
+		for (; i < num_hw_queues - 1; i++)
+			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
+		core_info->hw_queues[i] = NULL;
+	} else {
+		queue->num_users--;
+	}
+}
+
+static int
+dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
+		    __rte_unused const char *attr_name,
+		    uint64_t *attr_value)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_reset(struct rte_rawdev *rawdev)
+{
+	struct qdma_hw_queue *queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	int i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before reset.");
+		return -EBUSY;
+	}
+
+	/* In case there are pending jobs on any VQ, return -EBUSY */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use && (qdma_dev->vqs[i].num_enqueues !=
+		    qdma_dev->vqs[i].num_dequeues)) {
+			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
+			return -EBUSY;
 		}
-		q_storage->active_dqs = dq_storage;
-		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
-				   dq_storage);
 	}
 
-	dq_storage = q_storage->active_dqs;
-	rte_prefetch0((void *)(size_t)(dq_storage));
-	rte_prefetch0((void *)(size_t)(dq_storage + 1));
+	/* Reset HW queues */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next)
+		queue->num_users = 0;
 
-	/* Prepare next pull descriptor. This will give space for the
-	 * prefething done on DQRR entries
-	 */
-	q_storage->toggle ^= 1;
-	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
-	qbman_pull_desc_clear(&pulldesc);
-	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
-	qbman_pull_desc_set_fq(&pulldesc, fqid);
-	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
-		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
+	/* Reset and free virtual queues */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+	}
+	if (qdma_dev->vqs)
+		rte_free(qdma_dev->vqs);
+	qdma_dev->vqs = NULL;
 
-	/* Check if the previous issued command is completed.
-	 * Also seems like the SWP is shared between the Ethernet Driver
-	 * and the SEC driver.
+	/* Reset per core info */
+	memset(&qdma_core_info, 0,
+		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
+
+	/* Free the FLE pool */
+	if (qdma_dev->fle_pool)
+		rte_mempool_free(qdma_dev->fle_pool);
+
+	/* Reset QDMA device structure */
+	qdma_dev->max_hw_queues_per_core = 0;
+	qdma_dev->fle_pool = NULL;
+	qdma_dev->fle_pool_count = 0;
+	qdma_dev->max_vqs = 0;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
+			 rte_rawdev_obj_t config,
+			 size_t config_size)
+{
+	char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
+	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	if (config_size != sizeof(*qdma_config))
+		return -EINVAL;
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before config.");
+		return -1;
+	}
+
+	/* Set max HW queue per core */
+	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
+		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
+			       MAX_HW_QUEUE_PER_CORE);
+		return -EINVAL;
+	}
+	qdma_dev->max_hw_queues_per_core =
+		qdma_config->max_hw_queues_per_core;
+
+	/* Allocate Virtual Queues */
+	sprintf(name, "qdma_%d_vq", rawdev->dev_id);
+	qdma_dev->vqs = rte_malloc(name,
+			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
+			RTE_CACHE_LINE_SIZE);
+	if (!qdma_dev->vqs) {
+		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
+		return -ENOMEM;
+	}
+	qdma_dev->max_vqs = qdma_config->max_vqs;
+
+	/* Allocate FLE pool; just append PID so that in case of
+	 * multiprocess, the pool's don't collide.
 	 */
-	while (!qbman_check_command_complete(dq_storage))
-		;
-	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
-		clear_swp_active_dqs(q_storage->active_dpio_id);
+	snprintf(name, sizeof(name), "qdma_fle_pool%u",
+		 getpid());
+	qdma_dev->fle_pool = rte_mempool_create(name,
+			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
+			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
+			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!qdma_dev->fle_pool) {
+		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
+		rte_free(qdma_dev->vqs);
+		qdma_dev->vqs = NULL;
+		return -ENOMEM;
+	}
+	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
 
-	pending = 1;
+	return 0;
+}
 
-	do {
-		/* Loop until the dq_storage is updated with
-		 * new token by QBMAN
-		 */
-		while (!qbman_check_new_result(dq_storage))
-			;
-		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
-		/* Check whether Last Pull command is Expired and
-		 * setting Condition for Loop termination
-		 */
-		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-			pending = 0;
-			/* Check for valid frame. */
-			status = qbman_result_DQ_flags(dq_storage);
-			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
-				continue;
-		}
-		fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_start(struct rte_rawdev *rawdev)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	qdma_dev->state = 1;
 
-		vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-					  &job[num_rx]);
-		if (vq_id)
-			vq_id[num_rx] = vqid;
+	return 0;
+}
 
-		dq_storage++;
-		num_rx++;
-	} while (pending);
+static int
+check_devargs_handler(__rte_unused const char *key, const char *value,
+		      __rte_unused void *opaque)
+{
+	if (strcmp(value, "1"))
+		return -1;
 
-	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-		while (!qbman_check_command_complete(
-		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
-			;
-		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
-	}
-	/* issue a volatile dequeue command for next pull */
-	while (1) {
-		if (qbman_swp_pull(swp, &pulldesc)) {
-			DPAA2_QDMA_DP_WARN("VDQ command is not issued."
-					  "QBMAN is busy (2)\n");
-			continue;
-		}
-		break;
+	return 0;
+}
+
+static int
+dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+	struct rte_kvargs *kvlist;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
 	}
 
-	q_storage->active_dqs = dq_storage1;
-	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
+	if (rte_kvargs_process(kvlist, key,
+			       check_devargs_handler, NULL) < 0) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+	rte_kvargs_free(kvlist);
 
-	return num_rx;
+	return 1;
 }
 
 static int
-dpdmai_dev_dequeue_multijob_no_prefetch(
-		struct dpaa2_dpdmai_dev *dpdmai_dev,
-		uint16_t rxq_id,
-		uint16_t *vq_id,
-		struct rte_qdma_job **job,
-		uint16_t nb_jobs)
+dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
+			  __rte_unused uint16_t queue_id,
+			  rte_rawdev_obj_t queue_conf,
+			  size_t conf_size)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	char ring_name[32];
+	int i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_queue_config *q_config =
+		(struct rte_qdma_queue_config *)queue_conf;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	if (conf_size != sizeof(*q_config))
+		return -EINVAL;
+
+	rte_spinlock_lock(&qdma_dev->lock);
+
+	/* Get a free Virtual Queue */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use == 0)
+			break;
+	}
+
+	/* Return in case no VQ is free */
+	if (i == qdma_dev->max_vqs) {
+		rte_spinlock_unlock(&qdma_dev->lock);
+		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
+		return -ENODEV;
+	}
+
+	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
+		/* Allocate HW queue for a VQ */
+		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 1;
+	} else {
+		/* Allocate a Ring for Virtual Queue in VQ mode */
+		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
+		qdma_dev->vqs[i].status_ring = rte_ring_create(ring_name,
+			qdma_dev->fle_pool_count, rte_socket_id(), 0);
+		if (!qdma_dev->vqs[i].status_ring) {
+			DPAA2_QDMA_ERR("Status ring creation failed for vq");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return rte_errno;
 		}
+
+		/* Get a HW queue (shared) for a VQ */
+		qdma_dev->vqs[i].hw_queue = get_hw_queue(qdma_dev,
+						    q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 0;
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
+	if (qdma_dev->vqs[i].hw_queue == NULL) {
+		DPAA2_QDMA_ERR("No H/W queue available for VQ");
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+		qdma_dev->vqs[i].status_ring = NULL;
+		rte_spinlock_unlock(&qdma_dev->lock);
+		return -ENODEV;
+	}
 
-	do {
-		dq_storage = rxq->q_storage->dq_storage[0];
-		/* Prepare dequeue descriptor */
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+	qdma_dev->vqs[i].in_use = 1;
+	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
+	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
-		if (next_pull > dpaa2_dqrr_size) {
-			qbman_pull_desc_set_numframes(&pulldesc,
-					dpaa2_dqrr_size);
-			next_pull -= dpaa2_dqrr_size;
-		} else {
-			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
-			next_pull = 0;
-		}
+	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
+	} else {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_us;
+	}
+	if (dpaa2_get_devargs(rawdev->device->devargs,
+			DPAA2_QDMA_NO_PREFETCH) ||
+			(getenv("DPAA2_NO_QDMA_PREFETCH_RX"))) {
+		/* If no prefetch is configured. */
+		qdma_dev->vqs[i].dequeue_job =
+				dpdmai_dev_dequeue_multijob_no_prefetch;
+		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
+	} else {
+		qdma_dev->vqs[i].dequeue_job =
+			dpdmai_dev_dequeue_multijob_prefetch;
+	}
 
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy");
-				/* Portal was busy, try again */
-				continue;
-			}
-			break;
-		}
+	qdma_dev->vqs[i].enqueue_job = dpdmai_dev_enqueue_multi;
 
-		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
-		/* Check if the previous issued command is completed. */
-		while (!qbman_check_command_complete(dq_storage))
-			;
+	if (q_config->rbp != NULL)
+		memcpy(&qdma_dev->vqs[i].rbp, q_config->rbp,
+				sizeof(struct rte_qdma_rbp));
 
-		num_pulled = 0;
-		pending = 1;
+	rte_spinlock_unlock(&qdma_dev->lock);
 
-		do {
-			/* Loop until dq_storage is updated
-			 * with new token by QBMAN
-			 */
-			while (!qbman_check_new_result(dq_storage))
-				;
-			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+	return i;
+}
 
-			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-				pending = 0;
-				/* Check for valid frame. */
-				status = qbman_result_DQ_flags(dq_storage);
-				if (unlikely((status &
-					QBMAN_DQ_STAT_VALIDFRAME) == 0))
-					continue;
-			}
-			fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_enqueue(struct rte_rawdev *rawdev,
+		  __rte_unused struct rte_rawdev_buf **buffers,
+		  unsigned int nb_jobs,
+		  rte_rawdev_obj_t context)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct rte_qdma_enqdeq *e_context =
+		(struct rte_qdma_enqdeq *)context;
+	struct qdma_virt_queue *qdma_vq =
+		&dpdmai_dev->qdma_dev->vqs[e_context->vq_id];
+	int ret;
 
-			vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-						  &job[num_rx]);
-			if (vq_id)
-				vq_id[num_rx] = vqid;
+	/* Return error in case of wrong lcore_id */
+	if (rte_lcore_id() != qdma_vq->lcore_id) {
+		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
+				e_context->vq_id);
+		return -EINVAL;
+	}
 
-			dq_storage++;
-			num_rx++;
-			num_pulled++;
+	ret = qdma_vq->enqueue_job(qdma_vq, e_context->job, nb_jobs);
+	if (ret < 0) {
+		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
+		return ret;
+	}
 
-		} while (pending);
-	/* Last VDQ provided all packets and more packets are requested */
-	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+	qdma_vq->num_enqueues += ret;
 
-	return num_rx;
+	return ret;
 }
 
 static int
-dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
+dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 		   __rte_unused struct rte_rawdev_buf **buffers,
 		   unsigned int nb_jobs,
 		   rte_rawdev_obj_t cntxt)
 {
-	struct rte_qdma_enqdeq *context = (struct rte_qdma_enqdeq *)cntxt;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_enqdeq *context =
+		(struct rte_qdma_enqdeq *)cntxt;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[context->vq_id];
 	struct qdma_virt_queue *temp_qdma_vq;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
 	int ret = 0, i;
 	unsigned int ring_count;
 
@@ -1044,12 +1082,12 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		return 0;
 
 	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
-		nb_jobs = (qdma_vq->num_enqueues -  qdma_vq->num_dequeues);
+		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
 		/* In case of exclusive queue directly fetch from HW queue */
-		ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
-					 NULL, context->job, nb_jobs);
+		ret = qdma_vq->dequeue_job(qdma_vq, NULL,
+					context->job, nb_jobs);
 		if (ret < 0) {
 			DPAA2_QDMA_ERR(
 				"Dequeue from DPDMAI device failed: %d", ret);
@@ -1066,11 +1104,10 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		ring_count = rte_ring_count(qdma_vq->status_ring);
 		if (ring_count < nb_jobs) {
 			/* TODO - How to have right budget */
-			ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
-					qdma_pq->queue_id,
+			ret = qdma_vq->dequeue_job(qdma_vq,
 					temp_vq_id, context->job, nb_jobs);
 			for (i = 0; i < ret; i++) {
-				temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
+				temp_qdma_vq = &qdma_dev->vqs[temp_vq_id[i]];
 				rte_ring_enqueue(temp_qdma_vq->status_ring,
 					(void *)(context->job[i]));
 			}
@@ -1094,10 +1131,13 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 }
 
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_status)
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_status)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	if (qdma_vq->in_use) {
 		vq_status->exclusive_hw_queue = qdma_vq->exclusive_hw_queue;
@@ -1116,7 +1156,7 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1129,8 +1169,8 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	if (qdma_vq->exclusive_hw_queue)
 		free_hw_queue(qdma_vq->hw_queue);
 	else {
-		if (qdma_vqs->status_ring)
-			rte_ring_free(qdma_vqs->status_ring);
+		if (qdma_vq->status_ring)
+			rte_ring_free(qdma_vq->status_ring);
 
 		put_hw_queue(qdma_vq->hw_queue);
 	}
@@ -1253,43 +1293,6 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
 	return 0;
 }
 
-static int
-check_devargs_handler(__rte_unused const char *key, const char *value,
-		      __rte_unused void *opaque)
-{
-	if (strcmp(value, "1"))
-		return -1;
-
-	return 0;
-}
-
-static int
-dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
-{
-	struct rte_kvargs *kvlist;
-
-	if (!devargs)
-		return 0;
-
-	kvlist = rte_kvargs_parse(devargs->args, NULL);
-	if (!kvlist)
-		return 0;
-
-	if (!rte_kvargs_count(kvlist, key)) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-
-	if (rte_kvargs_process(kvlist, key,
-			       check_devargs_handler, NULL) < 0) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-	rte_kvargs_free(kvlist);
-
-	return 1;
-}
-
 static int
 dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 {
@@ -1393,17 +1396,6 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 		goto init_err;
 	}
 
-	if (dpaa2_get_devargs(rawdev->device->devargs,
-		DPAA2_QDMA_NO_PREFETCH)) {
-		/* If no prefetch is configured. */
-		dpdmai_dev_dequeue_multijob =
-				dpdmai_dev_dequeue_multijob_no_prefetch;
-		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
-	} else {
-		dpdmai_dev_dequeue_multijob =
-			dpdmai_dev_dequeue_multijob_prefetch;
-	}
-
 	if (!dpaa2_coherent_no_alloc_cache) {
 		if (dpaa2_svr_family == SVR_LX2160A) {
 			dpaa2_coherent_no_alloc_cache =
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 3c112d28fc..4265ee8283 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -52,10 +52,11 @@ struct qdma_device {
 	 * This is limited by MAX_HW_QUEUE_PER_CORE
 	 */
 	uint16_t max_hw_queues_per_core;
+
+	/** VQ's of this device */
+	struct qdma_virt_queue *vqs;
 	/** Maximum number of VQ's */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
 	/** Device state - started or stopped */
 	uint8_t state;
 	/** FLE pool for the device */
@@ -80,6 +81,26 @@ struct qdma_hw_queue {
 	uint32_t num_users;
 };
 
+struct qdma_virt_queue;
+
+typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
+					const struct qbman_fd *fd,
+					struct rte_qdma_job **job);
+typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
+					struct qbman_fd *fd,
+					struct rte_qdma_job *job);
+
+typedef int (qdma_dequeue_multijob_t)(
+				struct qdma_virt_queue *qdma_vq,
+				uint16_t *vq_id,
+				struct rte_qdma_job **job,
+				uint16_t nb_jobs);
+
+typedef int (qdma_enqueue_multijob_t)(
+			struct qdma_virt_queue *qdma_vq,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs);
+
 /** Represents a QDMA virtual queue */
 struct qdma_virt_queue {
 	/** Status ring of the virtual queue */
@@ -98,6 +119,14 @@ struct qdma_virt_queue {
 	uint64_t num_enqueues;
 	/* Total number of dequeues from this VQ */
 	uint64_t num_dequeues;
+
+	uint16_t vq_id;
+
+	qdma_set_fd_t *set_fd;
+	qdma_get_job_t *get_job;
+
+	qdma_dequeue_multijob_t *dequeue_job;
+	qdma_enqueue_multijob_t *enqueue_job;
 };
 
 /** Represents a QDMA per core hw queues allocation in virtual mode */
@@ -176,4 +205,10 @@ struct dpaa2_dpdmai_dev {
 	struct qdma_device *qdma_dev;
 };
 
+static inline struct qdma_device *
+QDMA_DEV_OF_VQ(struct qdma_virt_queue *vq)
+{
+	return vq->hw_queue->dpdmai_dev->qdma_dev;
+}
+
 #endif /* __DPAA2_QDMA_H__ */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 71894d35e9..ff4fc1d6c3 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -50,6 +50,8 @@ enum {
  */
 #define RTE_QDMA_VQ_EXCLUSIVE_PQ	(1ULL)
 
+#define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
@@ -68,10 +70,6 @@ struct rte_qdma_config {
 	uint16_t max_hw_queues_per_core;
 	/** Maximum number of VQ's to be used. */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
-	/** FD format */
-	uint8_t format;
 	/**
 	 * User provides this as input to the driver as a size of the FLE pool.
 	 * FLE's (and corresponding source/destination descriptors) are
@@ -182,13 +180,16 @@ struct rte_qdma_queue_config {
 /**
  * Get a Virtual Queue statistics.
  *
+ * @param rawdev
+ *   Raw Device.
  * @param vq_id
  *   Virtual Queue ID.
  * @param vq_stats
  *   VQ statistics structure which will be filled in by the driver.
  */
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_stats);
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_stats);
 
 #endif /* __RTE_PMD_DPAA2_QDMA_H__*/
-- 
2.39.5