X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fraw%2Fdpaa2_qdma%2Fdpaa2_qdma.c;h=c9059540042003097080d7a2f8afe466acecf46a;hb=91c0c9789c44bd478bfad6fc1b134741eab98987;hp=b38a282581fcc6976a8d791ea6e5df76fef7646c;hpb=c22fab9a6c349ba5fcf3f809c1d78a81424719f4;p=dpdk.git diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c index b38a282581..c905954004 100644 --- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c +++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2018 NXP + * Copyright 2018-2019 NXP */ #include @@ -13,18 +13,25 @@ #include #include #include +#include +#include #include #include #include +#include "rte_pmd_dpaa2_qdma.h" #include "dpaa2_qdma.h" #include "dpaa2_qdma_logs.h" -#include "rte_pmd_dpaa2_qdma.h" + +#define DPAA2_QDMA_NO_PREFETCH "no_prefetch" /* Dynamic log type identifier */ int dpaa2_qdma_logtype; +uint32_t dpaa2_coherent_no_alloc_cache; +uint32_t dpaa2_coherent_alloc_cache; + /* QDMA device */ static struct qdma_device qdma_dev; @@ -34,10 +41,279 @@ static struct qdma_hw_queue_list qdma_queue_list = TAILQ_HEAD_INITIALIZER(qdma_queue_list); /* QDMA Virtual Queues */ -struct qdma_virt_queue *qdma_vqs; +static struct qdma_virt_queue *qdma_vqs; /* QDMA per core data */ -struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE]; +static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE]; + +typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev, + uint16_t rxq_id, + uint16_t *vq_id, + struct rte_qdma_job **job, + uint16_t nb_jobs); + +dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob; + +typedef uint16_t (dpdmai_dev_get_job_t)(const struct qbman_fd *fd, + struct rte_qdma_job **job); +typedef int (dpdmai_dev_set_fd_t)(struct qbman_fd *fd, + struct rte_qdma_job *job, + struct rte_qdma_rbp *rbp, + uint16_t vq_id); +dpdmai_dev_get_job_t *dpdmai_dev_get_job; +dpdmai_dev_set_fd_t *dpdmai_dev_set_fd; + +static inline int +qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest, + uint32_t len, struct qbman_fd *fd, + struct rte_qdma_rbp *rbp) +{ + fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src)); + fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src)); + + fd->simple_pci.len_sl = len; + + fd->simple_pci.bmt = 1; + fd->simple_pci.fmt = 3; + fd->simple_pci.sl = 1; + fd->simple_pci.ser = 1; + + fd->simple_pci.sportid = rbp->sportid; /*pcie 3 */ + fd->simple_pci.srbp = rbp->srbp; + if (rbp->srbp) + fd->simple_pci.rdttype = 0; + else + fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache; + + /*dest is pcie memory */ + fd->simple_pci.dportid = rbp->dportid; /*pcie 3 */ + fd->simple_pci.drbp = rbp->drbp; + if (rbp->drbp) + fd->simple_pci.wrttype = 0; + else + fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache; + + fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest)); + fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest)); + + return 0; +} + +static inline int +qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, + uint32_t len, struct qbman_fd *fd) +{ + fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src)); + fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src)); + + fd->simple_ddr.len = len; + + fd->simple_ddr.bmt = 1; + fd->simple_ddr.fmt = 3; + fd->simple_ddr.sl = 1; + fd->simple_ddr.ser = 1; + /** + * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011 + * Coherent copy of cacheable memory, + * lookup in downstream cache, no allocate + * on miss + */ + fd->simple_ddr.rns = 0; + fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache; + /** + * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111 + * Coherent write of cacheable memory, + * lookup in downstream cache, no allocate on miss + */ + fd->simple_ddr.wns = 0; + fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache; + + fd->simple_ddr.daddr_lo = lower_32_bits((uint64_t) (dest)); + fd->simple_ddr.daddr_hi = upper_32_bits((uint64_t) (dest)); + + return 0; +} + +static void +dpaa2_qdma_populate_fle(struct qbman_fle *fle, + struct rte_qdma_rbp *rbp, + uint64_t src, uint64_t dest, + size_t len, uint32_t flags) +{ + struct qdma_sdd *sdd; + + sdd = (struct qdma_sdd *)((uint8_t *)(fle) + + (DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle))); + + /* first frame list to source descriptor */ + DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd)); + DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd)))); + + /* source and destination descriptor */ + if (rbp && rbp->enable) { + /* source */ + sdd->read_cmd.portid = rbp->sportid; + sdd->rbpcmd_simple.pfid = rbp->spfid; + sdd->rbpcmd_simple.vfid = rbp->svfid; + + if (rbp->srbp) { + sdd->read_cmd.rbp = rbp->srbp; + sdd->read_cmd.rdtype = DPAA2_RBP_MEM_RW; + } else { + sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache; + } + sdd++; + /* destination */ + sdd->write_cmd.portid = rbp->dportid; + sdd->rbpcmd_simple.pfid = rbp->dpfid; + sdd->rbpcmd_simple.vfid = rbp->dvfid; + + if (rbp->drbp) { + sdd->write_cmd.rbp = rbp->drbp; + sdd->write_cmd.wrttype = DPAA2_RBP_MEM_RW; + } else { + sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache; + } + + } else { + sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache; + sdd++; + sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache; + } + fle++; + /* source frame list to source buffer */ + if (flags & RTE_QDMA_JOB_SRC_PHY) { + DPAA2_SET_FLE_ADDR(fle, src); + DPAA2_SET_FLE_BMT(fle); + } else { + DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src)); + } + DPAA2_SET_FLE_LEN(fle, len); + + fle++; + /* destination frame list to destination buffer */ + if (flags & RTE_QDMA_JOB_DEST_PHY) { + DPAA2_SET_FLE_BMT(fle); + DPAA2_SET_FLE_ADDR(fle, dest); + } else { + DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest)); + } + DPAA2_SET_FLE_LEN(fle, len); + + /* Final bit: 1, for last frame list */ + DPAA2_SET_FLE_FIN(fle); +} + +static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd, + struct rte_qdma_job *job, + struct rte_qdma_rbp *rbp, + uint16_t vq_id) +{ + struct rte_qdma_job **ppjob; + size_t iova; + int ret = 0; + + if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK) + iova = (size_t)job->dest; + else + iova = (size_t)job->src; + + /* Set the metadata */ + job->vq_id = vq_id; + ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1; + *ppjob = job; + + if ((rbp->drbp == 1) || (rbp->srbp == 1)) + ret = qdma_populate_fd_pci((phys_addr_t) job->src, + (phys_addr_t) job->dest, + job->len, fd, rbp); + else + ret = qdma_populate_fd_ddr((phys_addr_t) job->src, + (phys_addr_t) job->dest, + job->len, fd); + return ret; +} +static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd, + struct rte_qdma_job *job, + struct rte_qdma_rbp *rbp, + uint16_t vq_id) +{ + struct rte_qdma_job **ppjob; + struct qbman_fle *fle; + int ret = 0; + /* + * Get an FLE/SDD from FLE pool. + * Note: IO metadata is before the FLE and SDD memory. + */ + ret = rte_mempool_get(qdma_dev.fle_pool, (void **)(&ppjob)); + if (ret) { + DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE"); + return ret; + } + + /* Set the metadata */ + job->vq_id = vq_id; + *ppjob = job; + + fle = (struct qbman_fle *)(ppjob + 1); + + DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(fle)); + DPAA2_SET_FD_COMPOUND_FMT(fd); + DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX); + + /* Populate FLE */ + memset(fle, 0, QDMA_FLE_POOL_SIZE); + dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest, + job->len, job->flags); + + return 0; +} + +static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd, + struct rte_qdma_job **job) +{ + uint16_t vqid; + size_t iova; + struct rte_qdma_job **ppjob; + + if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32)) + iova = (size_t) (((uint64_t)fd->simple_pci.daddr_hi) << 32 + | (uint64_t)fd->simple_pci.daddr_lo); + else + iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32 + | (uint64_t)fd->simple_pci.saddr_lo); + + ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1; + *job = (struct rte_qdma_job *)*ppjob; + (*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error); + vqid = (*job)->vq_id; + + return vqid; +} + +static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd, + struct rte_qdma_job **job) +{ + struct rte_qdma_job **ppjob; + uint16_t vqid; + /* + * Fetch metadata from FLE. job and vq_id were set + * in metadata in the enqueue operation. + */ + ppjob = (struct rte_qdma_job **) + DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)); + ppjob -= 1; + + *job = (struct rte_qdma_job *)*ppjob; + (*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) | + (DPAA2_GET_FD_FRC(fd) & 0xFF); + vqid = (*job)->vq_id; + + /* Free FLE to the pool */ + rte_mempool_put(qdma_dev.fle_pool, (void *)ppjob); + + return vqid; +} static struct qdma_hw_queue * alloc_hw_queue(uint32_t lcore_id) @@ -143,7 +419,7 @@ put_hw_queue(struct qdma_hw_queue *queue) } } -int __rte_experimental +int rte_qdma_init(void) { DPAA2_QDMA_FUNC_TRACE(); @@ -153,7 +429,7 @@ rte_qdma_init(void) return 0; } -void __rte_experimental +void rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr) { DPAA2_QDMA_FUNC_TRACE(); @@ -161,7 +437,7 @@ rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr) qdma_attr->num_hw_queues = qdma_dev.num_hw_queues; } -int __rte_experimental +int rte_qdma_reset(void) { struct qdma_hw_queue *queue; @@ -215,10 +491,11 @@ rte_qdma_reset(void) return 0; } -int __rte_experimental +int rte_qdma_configure(struct rte_qdma_config *qdma_config) { int ret; + char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */ DPAA2_QDMA_FUNC_TRACE(); @@ -258,8 +535,12 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config) } qdma_dev.max_vqs = qdma_config->max_vqs; - /* Allocate FLE pool */ - qdma_dev.fle_pool = rte_mempool_create("qdma_fle_pool", + /* Allocate FLE pool; just append PID so that in case of + * multiprocess, the pool's don't collide. + */ + snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u", + getpid()); + qdma_dev.fle_pool = rte_mempool_create(fle_pool_name, qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE, QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0); @@ -271,10 +552,17 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config) } qdma_dev.fle_pool_count = qdma_config->fle_pool_count; + if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) { + dpdmai_dev_get_job = dpdmai_dev_get_job_us; + dpdmai_dev_set_fd = dpdmai_dev_set_fd_us; + } else { + dpdmai_dev_get_job = dpdmai_dev_get_job_lf; + dpdmai_dev_set_fd = dpdmai_dev_set_fd_lf; + } return 0; } -int __rte_experimental +int rte_qdma_start(void) { DPAA2_QDMA_FUNC_TRACE(); @@ -284,7 +572,7 @@ rte_qdma_start(void) return 0; } -int __rte_experimental +int rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags) { char ring_name[32]; @@ -303,6 +591,7 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags) /* Return in case no VQ is free */ if (i == qdma_dev.max_vqs) { rte_spinlock_unlock(&qdma_dev.lock); + DPAA2_QDMA_ERR("Unable to get lock on QDMA device"); return -ENODEV; } @@ -313,7 +602,7 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags) qdma_vqs[i].exclusive_hw_queue = 1; } else { /* Allocate a Ring for Virutal Queue in VQ mode */ - sprintf(ring_name, "status ring %d", i); + snprintf(ring_name, sizeof(ring_name), "status ring %d", i); qdma_vqs[i].status_ring = rte_ring_create(ring_name, qdma_dev.fle_pool_count, rte_socket_id(), 0); if (!qdma_vqs[i].status_ring) { @@ -338,20 +627,474 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags) qdma_vqs[i].in_use = 1; qdma_vqs[i].lcore_id = lcore_id; - + memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp)); rte_spinlock_unlock(&qdma_dev.lock); return i; } -void __rte_experimental +/*create vq for route-by-port*/ +int +rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags, + struct rte_qdma_rbp *rbp) +{ + int i; + + i = rte_qdma_vq_create(lcore_id, flags); + + memcpy(&qdma_vqs[i].rbp, rbp, sizeof(struct rte_qdma_rbp)); + + return i; +} + +static int +dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev, + uint16_t txq_id, + uint16_t vq_id, + struct rte_qdma_rbp *rbp, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct qbman_fd fd[RTE_QDMA_BURST_NB_MAX]; + struct dpaa2_queue *txq; + struct qbman_eq_desc eqdesc; + struct qbman_swp *swp; + int ret; + uint32_t num_to_send = 0; + uint16_t num_tx = 0; + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + ret = dpaa2_affine_qbman_swp(); + if (ret) { + DPAA2_QDMA_ERR("Failure in affining portal"); + return 0; + } + } + swp = DPAA2_PER_LCORE_PORTAL; + + txq = &(dpdmai_dev->tx_queue[txq_id]); + + /* Prepare enqueue descriptor */ + qbman_eq_desc_clear(&eqdesc); + qbman_eq_desc_set_fq(&eqdesc, txq->fqid); + qbman_eq_desc_set_no_orp(&eqdesc, 0); + qbman_eq_desc_set_response(&eqdesc, 0, 0); + + memset(fd, 0, RTE_QDMA_BURST_NB_MAX * sizeof(struct qbman_fd)); + + while (nb_jobs > 0) { + uint32_t loop; + + num_to_send = (nb_jobs > dpaa2_eqcr_size) ? + dpaa2_eqcr_size : nb_jobs; + + for (loop = 0; loop < num_to_send; loop++) { + ret = dpdmai_dev_set_fd(&fd[loop], + job[num_tx], rbp, vq_id); + if (ret < 0) { + /* Set nb_jobs to loop, so outer while loop + * breaks out. + */ + nb_jobs = loop; + break; + } + + num_tx++; + } + + /* Enqueue the packet to the QBMAN */ + uint32_t enqueue_loop = 0, retry_count = 0; + while (enqueue_loop < loop) { + ret = qbman_swp_enqueue_multiple(swp, + &eqdesc, + &fd[enqueue_loop], + NULL, + loop - enqueue_loop); + if (unlikely(ret < 0)) { + retry_count++; + if (retry_count > DPAA2_MAX_TX_RETRY_COUNT) + return num_tx - (loop - enqueue_loop); + } else { + enqueue_loop += ret; + retry_count = 0; + } + } + nb_jobs -= loop; + } + return num_tx; +} + +int +rte_qdma_vq_enqueue_multi(uint16_t vq_id, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id]; + struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue; + struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev; + int ret; + + /* Return error in case of wrong lcore_id */ + if (rte_lcore_id() != qdma_vq->lcore_id) { + DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core", + vq_id); + return -EINVAL; + } + + ret = dpdmai_dev_enqueue_multi(dpdmai_dev, + qdma_pq->queue_id, + vq_id, + &qdma_vq->rbp, + job, + nb_jobs); + if (ret < 0) { + DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret); + return ret; + } + + qdma_vq->num_enqueues += ret; + + return ret; +} + +int +rte_qdma_vq_enqueue(uint16_t vq_id, + struct rte_qdma_job *job) +{ + return rte_qdma_vq_enqueue_multi(vq_id, &job, 1); +} + +/* Function to receive a QDMA job for a given device and queue*/ +static int +dpdmai_dev_dequeue_multijob_prefetch( + struct dpaa2_dpdmai_dev *dpdmai_dev, + uint16_t rxq_id, + uint16_t *vq_id, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct dpaa2_queue *rxq; + struct qbman_result *dq_storage, *dq_storage1 = NULL; + struct qbman_pull_desc pulldesc; + struct qbman_swp *swp; + struct queue_storage_info_t *q_storage; + uint32_t fqid; + uint8_t status, pending; + uint8_t num_rx = 0; + const struct qbman_fd *fd; + uint16_t vqid; + int ret, pull_size; + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + ret = dpaa2_affine_qbman_swp(); + if (ret) { + DPAA2_QDMA_ERR("Failure in affining portal"); + return 0; + } + } + swp = DPAA2_PER_LCORE_PORTAL; + + pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs; + rxq = &(dpdmai_dev->rx_queue[rxq_id]); + fqid = rxq->fqid; + q_storage = rxq->q_storage; + + if (unlikely(!q_storage->active_dqs)) { + q_storage->toggle = 0; + dq_storage = q_storage->dq_storage[q_storage->toggle]; + q_storage->last_num_pkts = pull_size; + qbman_pull_desc_clear(&pulldesc); + qbman_pull_desc_set_numframes(&pulldesc, + q_storage->last_num_pkts); + qbman_pull_desc_set_fq(&pulldesc, fqid); + qbman_pull_desc_set_storage(&pulldesc, dq_storage, + (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1); + if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) { + while (!qbman_check_command_complete( + get_swp_active_dqs( + DPAA2_PER_LCORE_DPIO->index))) + ; + clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index); + } + while (1) { + if (qbman_swp_pull(swp, &pulldesc)) { + DPAA2_QDMA_DP_WARN( + "VDQ command not issued.QBMAN busy\n"); + /* Portal was busy, try again */ + continue; + } + break; + } + q_storage->active_dqs = dq_storage; + q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index; + set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, + dq_storage); + } + + dq_storage = q_storage->active_dqs; + rte_prefetch0((void *)(size_t)(dq_storage)); + rte_prefetch0((void *)(size_t)(dq_storage + 1)); + + /* Prepare next pull descriptor. This will give space for the + * prefething done on DQRR entries + */ + q_storage->toggle ^= 1; + dq_storage1 = q_storage->dq_storage[q_storage->toggle]; + qbman_pull_desc_clear(&pulldesc); + qbman_pull_desc_set_numframes(&pulldesc, pull_size); + qbman_pull_desc_set_fq(&pulldesc, fqid); + qbman_pull_desc_set_storage(&pulldesc, dq_storage1, + (size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1); + + /* Check if the previous issued command is completed. + * Also seems like the SWP is shared between the Ethernet Driver + * and the SEC driver. + */ + while (!qbman_check_command_complete(dq_storage)) + ; + if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id)) + clear_swp_active_dqs(q_storage->active_dpio_id); + + pending = 1; + + do { + /* Loop until the dq_storage is updated with + * new token by QBMAN + */ + while (!qbman_check_new_result(dq_storage)) + ; + rte_prefetch0((void *)((size_t)(dq_storage + 2))); + /* Check whether Last Pull command is Expired and + * setting Condition for Loop termination + */ + if (qbman_result_DQ_is_pull_complete(dq_storage)) { + pending = 0; + /* Check for valid frame. */ + status = qbman_result_DQ_flags(dq_storage); + if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0)) + continue; + } + fd = qbman_result_DQ_fd(dq_storage); + + vqid = dpdmai_dev_get_job(fd, &job[num_rx]); + if (vq_id) + vq_id[num_rx] = vqid; + + dq_storage++; + num_rx++; + } while (pending); + + if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) { + while (!qbman_check_command_complete( + get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index))) + ; + clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index); + } + /* issue a volatile dequeue command for next pull */ + while (1) { + if (qbman_swp_pull(swp, &pulldesc)) { + DPAA2_QDMA_DP_WARN("VDQ command is not issued." + "QBMAN is busy (2)\n"); + continue; + } + break; + } + + q_storage->active_dqs = dq_storage1; + q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index; + set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1); + + return num_rx; +} + +static int +dpdmai_dev_dequeue_multijob_no_prefetch( + struct dpaa2_dpdmai_dev *dpdmai_dev, + uint16_t rxq_id, + uint16_t *vq_id, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct dpaa2_queue *rxq; + struct qbman_result *dq_storage; + struct qbman_pull_desc pulldesc; + struct qbman_swp *swp; + uint32_t fqid; + uint8_t status, pending; + uint8_t num_rx = 0; + const struct qbman_fd *fd; + uint16_t vqid; + int ret, next_pull = nb_jobs, num_pulled = 0; + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + ret = dpaa2_affine_qbman_swp(); + if (ret) { + DPAA2_QDMA_ERR("Failure in affining portal"); + return 0; + } + } + swp = DPAA2_PER_LCORE_PORTAL; + + rxq = &(dpdmai_dev->rx_queue[rxq_id]); + fqid = rxq->fqid; + + do { + dq_storage = rxq->q_storage->dq_storage[0]; + /* Prepare dequeue descriptor */ + qbman_pull_desc_clear(&pulldesc); + qbman_pull_desc_set_fq(&pulldesc, fqid); + qbman_pull_desc_set_storage(&pulldesc, dq_storage, + (uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1); + + if (next_pull > dpaa2_dqrr_size) { + qbman_pull_desc_set_numframes(&pulldesc, + dpaa2_dqrr_size); + next_pull -= dpaa2_dqrr_size; + } else { + qbman_pull_desc_set_numframes(&pulldesc, next_pull); + next_pull = 0; + } + + while (1) { + if (qbman_swp_pull(swp, &pulldesc)) { + DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy"); + /* Portal was busy, try again */ + continue; + } + break; + } + + rte_prefetch0((void *)((size_t)(dq_storage + 1))); + /* Check if the previous issued command is completed. */ + while (!qbman_check_command_complete(dq_storage)) + ; + + num_pulled = 0; + pending = 1; + + do { + /* Loop until dq_storage is updated + * with new token by QBMAN + */ + while (!qbman_check_new_result(dq_storage)) + ; + rte_prefetch0((void *)((size_t)(dq_storage + 2))); + + if (qbman_result_DQ_is_pull_complete(dq_storage)) { + pending = 0; + /* Check for valid frame. */ + status = qbman_result_DQ_flags(dq_storage); + if (unlikely((status & + QBMAN_DQ_STAT_VALIDFRAME) == 0)) + continue; + } + fd = qbman_result_DQ_fd(dq_storage); + + vqid = dpdmai_dev_get_job(fd, &job[num_rx]); + if (vq_id) + vq_id[num_rx] = vqid; + + dq_storage++; + num_rx++; + num_pulled++; + + } while (pending); + /* Last VDQ provided all packets and more packets are requested */ + } while (next_pull && num_pulled == dpaa2_dqrr_size); + + return num_rx; +} + +int +rte_qdma_vq_dequeue_multi(uint16_t vq_id, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id]; + struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue; + struct qdma_virt_queue *temp_qdma_vq; + struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev; + int ring_count, ret = 0, i; + + /* Return error in case of wrong lcore_id */ + if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) { + DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core", + vq_id); + return -1; + } + + /* Only dequeue when there are pending jobs on VQ */ + if (qdma_vq->num_enqueues == qdma_vq->num_dequeues) + return 0; + + if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs)) + nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues); + + if (qdma_vq->exclusive_hw_queue) { + /* In case of exclusive queue directly fetch from HW queue */ + ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id, + NULL, job, nb_jobs); + if (ret < 0) { + DPAA2_QDMA_ERR( + "Dequeue from DPDMAI device failed: %d", ret); + return ret; + } + qdma_vq->num_dequeues += ret; + } else { + uint16_t temp_vq_id[RTE_QDMA_BURST_NB_MAX]; + /* + * Get the QDMA completed jobs from the software ring. + * In case they are not available on the ring poke the HW + * to fetch completed jobs from corresponding HW queues + */ + ring_count = rte_ring_count(qdma_vq->status_ring); + if (ring_count < nb_jobs) { + /* TODO - How to have right budget */ + ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, + qdma_pq->queue_id, + temp_vq_id, job, nb_jobs); + for (i = 0; i < ret; i++) { + temp_qdma_vq = &qdma_vqs[temp_vq_id[i]]; + rte_ring_enqueue(temp_qdma_vq->status_ring, + (void *)(job[i])); + } + ring_count = rte_ring_count( + qdma_vq->status_ring); + } + + if (ring_count) { + /* Dequeue job from the software ring + * to provide to the user + */ + ret = rte_ring_dequeue_bulk(qdma_vq->status_ring, + (void **)job, ring_count, NULL); + if (ret) + qdma_vq->num_dequeues += ret; + } + } + + return ret; +} + +struct rte_qdma_job * +rte_qdma_vq_dequeue(uint16_t vq_id) +{ + int ret; + struct rte_qdma_job *job = NULL; + + ret = rte_qdma_vq_dequeue_multi(vq_id, &job, 1); + if (ret < 0) + DPAA2_QDMA_DP_WARN("DPDMAI device dequeue failed: %d", ret); + + return job; +} + +void rte_qdma_vq_stats(uint16_t vq_id, struct rte_qdma_vq_stats *vq_status) { struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id]; - DPAA2_QDMA_FUNC_TRACE(); - if (qdma_vq->in_use) { vq_status->exclusive_hw_queue = qdma_vq->exclusive_hw_queue; vq_status->lcore_id = qdma_vq->lcore_id; @@ -362,7 +1105,7 @@ rte_qdma_vq_stats(uint16_t vq_id, } } -int __rte_experimental +int rte_qdma_vq_destroy(uint16_t vq_id) { struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id]; @@ -386,12 +1129,41 @@ rte_qdma_vq_destroy(uint16_t vq_id) memset(qdma_vq, 0, sizeof(struct qdma_virt_queue)); + rte_spinlock_unlock(&qdma_dev.lock); + + return 0; +} + +int +rte_qdma_vq_destroy_rbp(uint16_t vq_id) +{ + struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id]; + + DPAA2_QDMA_FUNC_TRACE(); + + /* In case there are pending jobs on any VQ, return -EBUSY */ + if (qdma_vq->num_enqueues != qdma_vq->num_dequeues) + return -EBUSY; + rte_spinlock_lock(&qdma_dev.lock); + if (qdma_vq->exclusive_hw_queue) { + free_hw_queue(qdma_vq->hw_queue); + } else { + if (qdma_vqs->status_ring) + rte_ring_free(qdma_vqs->status_ring); + + put_hw_queue(qdma_vq->hw_queue); + } + + memset(qdma_vq, 0, sizeof(struct qdma_virt_queue)); + + rte_spinlock_unlock(&qdma_dev.lock); + return 0; } -void __rte_experimental +void rte_qdma_stop(void) { DPAA2_QDMA_FUNC_TRACE(); @@ -399,7 +1171,7 @@ rte_qdma_stop(void) qdma_dev.state = 0; } -void __rte_experimental +void rte_qdma_destroy(void) { DPAA2_QDMA_FUNC_TRACE(); @@ -460,9 +1232,6 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev) DPAA2_QDMA_FUNC_TRACE(); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return 0; - /* Remove HW queues from global list */ remove_hw_queues_from_list(dpdmai_dev); @@ -472,7 +1241,7 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev) DPAA2_QDMA_ERR("dmdmai disable failed"); /* Set up the DQRR storage for Rx */ - for (i = 0; i < DPDMAI_PRIO_NUM; i++) { + for (i = 0; i < dpdmai_dev->num_queues; i++) { struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[i]); if (rxq->q_storage) { @@ -489,6 +1258,43 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev) return 0; } +static int +check_devargs_handler(__rte_unused const char *key, const char *value, + __rte_unused void *opaque) +{ + if (strcmp(value, "1")) + return -1; + + return 0; +} + +static int +dpaa2_get_devargs(struct rte_devargs *devargs, const char *key) +{ + struct rte_kvargs *kvlist; + + if (!devargs) + return 0; + + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (!kvlist) + return 0; + + if (!rte_kvargs_count(kvlist, key)) { + rte_kvargs_free(kvlist); + return 0; + } + + if (rte_kvargs_process(kvlist, key, + check_devargs_handler, NULL) < 0) { + rte_kvargs_free(kvlist); + return 0; + } + rte_kvargs_free(kvlist); + + return 1; +} + static int dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) { @@ -501,10 +1307,6 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) DPAA2_QDMA_FUNC_TRACE(); - /* For secondary processes, the primary has done all the work */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return 0; - /* Open DPDMAI device */ dpdmai_dev->dpdmai_id = dpdmai_id; dpdmai_dev->dpdmai.regs = rte_mcp_ptr_list[MC_PORTAL_INDEX]; @@ -523,17 +1325,17 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) ret); goto init_err; } - dpdmai_dev->num_queues = attr.num_of_priorities; + dpdmai_dev->num_queues = attr.num_of_queues; /* Set up Rx Queues */ - for (i = 0; i < attr.num_of_priorities; i++) { + for (i = 0; i < dpdmai_dev->num_queues; i++) { struct dpaa2_queue *rxq; memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg)); ret = dpdmai_set_rx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW, dpdmai_dev->token, - i, &rx_queue_cfg); + i, 0, &rx_queue_cfg); if (ret) { DPAA2_QDMA_ERR("Setting Rx queue failed with err: %d", ret); @@ -560,9 +1362,9 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) } /* Get Rx and Tx queues FQID's */ - for (i = 0; i < DPDMAI_PRIO_NUM; i++) { + for (i = 0; i < dpdmai_dev->num_queues; i++) { ret = dpdmai_get_rx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW, - dpdmai_dev->token, i, &rx_attr); + dpdmai_dev->token, i, 0, &rx_attr); if (ret) { DPAA2_QDMA_ERR("Reading device failed with err: %d", ret); @@ -571,7 +1373,7 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) dpdmai_dev->rx_queue[i].fqid = rx_attr.fqid; ret = dpdmai_get_tx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW, - dpdmai_dev->token, i, &tx_attr); + dpdmai_dev->token, i, 0, &tx_attr); if (ret) { DPAA2_QDMA_ERR("Reading device failed with err: %d", ret); @@ -594,6 +1396,32 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id) DPAA2_QDMA_ERR("Adding H/W queue to list failed"); goto init_err; } + + if (dpaa2_get_devargs(rawdev->device->devargs, + DPAA2_QDMA_NO_PREFETCH)) { + /* If no prefetch is configured. */ + dpdmai_dev_dequeue_multijob = + dpdmai_dev_dequeue_multijob_no_prefetch; + DPAA2_QDMA_INFO("No Prefetch RX Mode enabled"); + } else { + dpdmai_dev_dequeue_multijob = + dpdmai_dev_dequeue_multijob_prefetch; + } + + if (!dpaa2_coherent_no_alloc_cache) { + if (dpaa2_svr_family == SVR_LX2160A) { + dpaa2_coherent_no_alloc_cache = + DPAA2_LX2_COHERENT_NO_ALLOCATE_CACHE; + dpaa2_coherent_alloc_cache = + DPAA2_LX2_COHERENT_ALLOCATE_CACHE; + } else { + dpaa2_coherent_no_alloc_cache = + DPAA2_COHERENT_NO_ALLOCATE_CACHE; + dpaa2_coherent_alloc_cache = + DPAA2_COHERENT_ALLOCATE_CACHE; + } + } + DPAA2_QDMA_DEBUG("Initialized dpdmai object successfully"); return 0; @@ -652,16 +1480,17 @@ rte_dpaa2_qdma_remove(struct rte_dpaa2_device *dpaa2_dev) } static struct rte_dpaa2_driver rte_dpaa2_qdma_pmd = { + .drv_flags = RTE_DPAA2_DRV_IOVA_AS_VA, .drv_type = DPAA2_QDMA, .probe = rte_dpaa2_qdma_probe, .remove = rte_dpaa2_qdma_remove, }; RTE_PMD_REGISTER_DPAA2(dpaa2_qdma, rte_dpaa2_qdma_pmd); +RTE_PMD_REGISTER_PARAM_STRING(dpaa2_qdma, + "no_prefetch= "); -RTE_INIT(dpaa2_qdma_init_log); -static void -dpaa2_qdma_init_log(void) +RTE_INIT(dpaa2_qdma_init_log) { dpaa2_qdma_logtype = rte_log_register("pmd.raw.dpaa2.qdma"); if (dpaa2_qdma_logtype >= 0)