From 9124e65dd3eb513fd15317b1c5fd566c1a478627 Mon Sep 17 00:00:00 2001 From: Gagandeep Singh Date: Tue, 7 Jul 2020 14:52:24 +0530 Subject: [PATCH] net/dpaa: enable Tx queue taildrop Enable congestion handling/tail drop for TX queues. Signed-off-by: Gagandeep Singh Acked-by: Hemant Agrawal --- drivers/bus/dpaa/base/qbman/qman.c | 43 +++++++++ drivers/bus/dpaa/include/fsl_qman.h | 17 ++++ drivers/bus/dpaa/rte_bus_dpaa_version.map | 2 + drivers/net/dpaa/dpaa_ethdev.c | 111 ++++++++++++++++++++-- drivers/net/dpaa/dpaa_ethdev.h | 1 + drivers/net/dpaa/dpaa_rxtx.c | 71 ++++++++++++++ drivers/net/dpaa/dpaa_rxtx.h | 3 + 7 files changed, 242 insertions(+), 6 deletions(-) diff --git a/drivers/bus/dpaa/base/qbman/qman.c b/drivers/bus/dpaa/base/qbman/qman.c index b596e79c2f..447c091770 100644 --- a/drivers/bus/dpaa/base/qbman/qman.c +++ b/drivers/bus/dpaa/base/qbman/qman.c @@ -40,6 +40,8 @@ spin_unlock(&__fq478->fqlock); \ } while (0) +static qman_cb_free_mbuf qman_free_mbuf_cb; + static inline void fq_set(struct qman_fq *fq, u32 mask) { dpaa_set_bits(mask, &fq->flags); @@ -790,6 +792,47 @@ static inline void fq_state_change(struct qman_portal *p, struct qman_fq *fq, FQUNLOCK(fq); } +void +qman_ern_register_cb(qman_cb_free_mbuf cb) +{ + qman_free_mbuf_cb = cb; +} + + +void +qman_ern_poll_free(void) +{ + struct qman_portal *p = get_affine_portal(); + u8 verb, num = 0; + const struct qm_mr_entry *msg; + const struct qm_fd *fd; + struct qm_mr_entry swapped_msg; + + qm_mr_pvb_update(&p->p); + msg = qm_mr_current(&p->p); + + while (msg != NULL) { + swapped_msg = *msg; + hw_fd_to_cpu(&swapped_msg.ern.fd); + verb = msg->ern.verb & QM_MR_VERB_TYPE_MASK; + fd = &swapped_msg.ern.fd; + + if (unlikely(verb & 0x20)) { + printf("HW ERN notification, Nothing to do\n"); + } else { + if ((fd->bpid & 0xff) != 0xff) + qman_free_mbuf_cb(fd); + } + + num++; + qm_mr_next(&p->p); + qm_mr_pvb_update(&p->p); + msg = qm_mr_current(&p->p); + } + + qm_mr_cci_consume(&p->p, num); +} + static u32 __poll_portal_slow(struct qman_portal *p, u32 is) { const struct qm_mr_entry *msg; diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h index 78b698f393..0d9cfc3395 100644 --- a/drivers/bus/dpaa/include/fsl_qman.h +++ b/drivers/bus/dpaa/include/fsl_qman.h @@ -1158,6 +1158,10 @@ typedef void (*qman_cb_mr)(struct qman_portal *qm, struct qman_fq *fq, /* This callback type is used when handling DCP ERNs */ typedef void (*qman_cb_dc_ern)(struct qman_portal *qm, const struct qm_mr_entry *msg); + +/* This callback function will be used to free mbufs of ERN */ +typedef uint16_t (*qman_cb_free_mbuf)(const struct qm_fd *fd); + /* * s/w-visible states. Ie. tentatively scheduled + truly scheduled + active + * held-active + held-suspended are just "sched". Things like "retired" will not @@ -1808,6 +1812,19 @@ __rte_internal int qman_enqueue_multi(struct qman_fq *fq, const struct qm_fd *fd, u32 *flags, int frames_to_send); +/** + * qman_ern_poll_free - Polling on MR and calling a callback function to free + * mbufs when SW ERNs received. + */ +__rte_internal +void qman_ern_poll_free(void); + +/** + * qman_ern_register_cb - Register a callback function to free buffers. + */ +__rte_internal +void qman_ern_register_cb(qman_cb_free_mbuf cb); + /** * qman_enqueue_multi_fq - Enqueue multiple frames to their respective frame * queues. diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map b/drivers/bus/dpaa/rte_bus_dpaa_version.map index 491c507119..86e59f630e 100644 --- a/drivers/bus/dpaa/rte_bus_dpaa_version.map +++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map @@ -57,6 +57,8 @@ INTERNAL { qman_enqueue; qman_enqueue_multi; qman_enqueue_multi_fq; + qman_ern_poll_free; + qman_ern_register_cb; qman_fq_fqid; qman_fq_portal_irqsource_add; qman_fq_portal_irqsource_remove; diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c index f1c9a71515..fd2c0c681c 100644 --- a/drivers/net/dpaa/dpaa_ethdev.c +++ b/drivers/net/dpaa/dpaa_ethdev.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2017-2019 NXP + * Copyright 2017-2020 NXP * */ /* System headers */ @@ -86,9 +86,12 @@ static int dpaa_push_mode_max_queue = DPAA_DEFAULT_PUSH_MODE_QUEUE; static int dpaa_push_queue_idx; /* Queue index which are in push mode*/ -/* Per FQ Taildrop in frame count */ +/* Per RX FQ Taildrop in frame count */ static unsigned int td_threshold = CGR_RX_PERFQ_THRESH; +/* Per TX FQ Taildrop in frame count, disabled by default */ +static unsigned int td_tx_threshold; + struct rte_dpaa_xstats_name_off { char name[RTE_ETH_XSTATS_NAME_SIZE]; uint32_t offset; @@ -275,7 +278,11 @@ static int dpaa_eth_dev_start(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); /* Change tx callback to the real one */ - dev->tx_pkt_burst = dpaa_eth_queue_tx; + if (dpaa_intf->cgr_tx) + dev->tx_pkt_burst = dpaa_eth_queue_tx_slow; + else + dev->tx_pkt_burst = dpaa_eth_queue_tx; + fman_if_enable_rx(dpaa_intf->fif); return 0; @@ -867,6 +874,7 @@ int dpaa_eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, DPAA_PMD_INFO("Tx queue setup for queue index: %d fq_id (0x%x)", queue_idx, dpaa_intf->tx_queues[queue_idx].fqid); dev->data->tx_queues[queue_idx] = &dpaa_intf->tx_queues[queue_idx]; + return 0; } @@ -1236,9 +1244,19 @@ without_cgr: /* Initialise a Tx FQ */ static int dpaa_tx_queue_init(struct qman_fq *fq, - struct fman_if *fman_intf) + struct fman_if *fman_intf, + struct qman_cgr *cgr_tx) { struct qm_mcc_initfq opts = {0}; + struct qm_mcc_initcgr cgr_opts = { + .we_mask = QM_CGR_WE_CS_THRES | + QM_CGR_WE_CSTD_EN | + QM_CGR_WE_MODE, + .cgr = { + .cstd_en = QM_CGR_EN, + .mode = QMAN_CGR_MODE_FRAME + } + }; int ret; ret = qman_create_fq(0, QMAN_FQ_FLAG_DYNAMIC_FQID | @@ -1257,6 +1275,27 @@ static int dpaa_tx_queue_init(struct qman_fq *fq, opts.fqd.context_a.hi = 0x80000000 | fman_dealloc_bufs_mask_hi; opts.fqd.context_a.lo = 0 | fman_dealloc_bufs_mask_lo; DPAA_PMD_DEBUG("init tx fq %p, fqid 0x%x", fq, fq->fqid); + + if (cgr_tx) { + /* Enable tail drop with cgr on this queue */ + qm_cgr_cs_thres_set64(&cgr_opts.cgr.cs_thres, + td_tx_threshold, 0); + cgr_tx->cb = NULL; + ret = qman_create_cgr(cgr_tx, QMAN_CGR_FLAG_USE_INIT, + &cgr_opts); + if (ret) { + DPAA_PMD_WARN( + "rx taildrop init fail on rx fqid 0x%x(ret=%d)", + fq->fqid, ret); + goto without_cgr; + } + opts.we_mask |= QM_INITFQ_WE_CGID; + opts.fqd.cgid = cgr_tx->cgrid; + opts.fqd.fq_ctrl |= QM_FQCTRL_CGE; + DPAA_PMD_DEBUG("Tx FQ tail drop enabled, threshold = %d\n", + td_tx_threshold); + } +without_cgr: ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts); if (ret) DPAA_PMD_ERR("init tx fqid 0x%x failed %d", fq->fqid, ret); @@ -1309,6 +1348,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) struct fman_if *fman_intf; struct fman_if_bpool *bp, *tmp_bp; uint32_t cgrid[DPAA_MAX_NUM_PCD_QUEUES]; + uint32_t cgrid_tx[MAX_DPAA_CORES]; char eth_buf[RTE_ETHER_ADDR_FMT_SIZE]; PMD_INIT_FUNC_TRACE(); @@ -1319,7 +1359,10 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &dpaa_devops; /* Plugging of UCODE burst API not supported in Secondary */ eth_dev->rx_pkt_burst = dpaa_eth_queue_rx; - eth_dev->tx_pkt_burst = dpaa_eth_queue_tx; + if (dpaa_intf->cgr_tx) + eth_dev->tx_pkt_burst = dpaa_eth_queue_tx_slow; + else + eth_dev->tx_pkt_burst = dpaa_eth_queue_tx; #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP qman_set_fq_lookup_table( dpaa_intf->rx_queues->qman_fq_lookup_table); @@ -1366,6 +1409,21 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) return -ENOMEM; } + memset(cgrid, 0, sizeof(cgrid)); + memset(cgrid_tx, 0, sizeof(cgrid_tx)); + + /* if DPAA_TX_TAILDROP_THRESHOLD is set, use that value; if 0, it means + * Tx tail drop is disabled. + */ + if (getenv("DPAA_TX_TAILDROP_THRESHOLD")) { + td_tx_threshold = atoi(getenv("DPAA_TX_TAILDROP_THRESHOLD")); + DPAA_PMD_DEBUG("Tail drop threshold env configured: %u", + td_tx_threshold); + /* if a very large value is being configured */ + if (td_tx_threshold > UINT16_MAX) + td_tx_threshold = CGR_RX_PERFQ_THRESH; + } + /* If congestion control is enabled globally*/ if (td_threshold) { dpaa_intf->cgr_rx = rte_zmalloc(NULL, @@ -1414,9 +1472,36 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) goto free_rx; } + /* If congestion control is enabled globally*/ + if (td_tx_threshold) { + dpaa_intf->cgr_tx = rte_zmalloc(NULL, + sizeof(struct qman_cgr) * MAX_DPAA_CORES, + MAX_CACHELINE); + if (!dpaa_intf->cgr_tx) { + DPAA_PMD_ERR("Failed to alloc mem for cgr_tx\n"); + ret = -ENOMEM; + goto free_rx; + } + + ret = qman_alloc_cgrid_range(&cgrid_tx[0], MAX_DPAA_CORES, + 1, 0); + if (ret != MAX_DPAA_CORES) { + DPAA_PMD_WARN("insufficient CGRIDs available"); + ret = -EINVAL; + goto free_rx; + } + } else { + dpaa_intf->cgr_tx = NULL; + } + + for (loop = 0; loop < MAX_DPAA_CORES; loop++) { + if (dpaa_intf->cgr_tx) + dpaa_intf->cgr_tx[loop].cgrid = cgrid_tx[loop]; + ret = dpaa_tx_queue_init(&dpaa_intf->tx_queues[loop], - fman_intf); + fman_intf, + dpaa_intf->cgr_tx ? &dpaa_intf->cgr_tx[loop] : NULL); if (ret) goto free_tx; dpaa_intf->tx_queues[loop].dpaa_intf = dpaa_intf; @@ -1487,6 +1572,7 @@ free_tx: free_rx: rte_free(dpaa_intf->cgr_rx); + rte_free(dpaa_intf->cgr_tx); rte_free(dpaa_intf->rx_queues); dpaa_intf->rx_queues = NULL; dpaa_intf->nb_rx_queues = 0; @@ -1527,6 +1613,17 @@ dpaa_dev_uninit(struct rte_eth_dev *dev) rte_free(dpaa_intf->cgr_rx); dpaa_intf->cgr_rx = NULL; + /* Release TX congestion Groups */ + if (dpaa_intf->cgr_tx) { + for (loop = 0; loop < MAX_DPAA_CORES; loop++) + qman_delete_cgr(&dpaa_intf->cgr_tx[loop]); + + qman_release_cgrid_range(dpaa_intf->cgr_tx[loop].cgrid, + MAX_DPAA_CORES); + rte_free(dpaa_intf->cgr_tx); + dpaa_intf->cgr_tx = NULL; + } + rte_free(dpaa_intf->rx_queues); dpaa_intf->rx_queues = NULL; @@ -1631,6 +1728,8 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused, eth_dev->device = &dpaa_dev->device; dpaa_dev->eth_dev = eth_dev; + qman_ern_register_cb(dpaa_free_mbuf); + /* Invoke PMD device initialization function */ diag = dpaa_dev_init(eth_dev); if (diag == 0) { diff --git a/drivers/net/dpaa/dpaa_ethdev.h b/drivers/net/dpaa/dpaa_ethdev.h index 6a6477ac87..d4261f8854 100644 --- a/drivers/net/dpaa/dpaa_ethdev.h +++ b/drivers/net/dpaa/dpaa_ethdev.h @@ -111,6 +111,7 @@ struct dpaa_if { struct qman_fq *rx_queues; struct qman_cgr *cgr_rx; struct qman_fq *tx_queues; + struct qman_cgr *cgr_tx; struct qman_fq debug_queues[2]; uint16_t nb_rx_queues; uint16_t nb_tx_queues; diff --git a/drivers/net/dpaa/dpaa_rxtx.c b/drivers/net/dpaa/dpaa_rxtx.c index 3aeecb7d2e..819cad7c6c 100644 --- a/drivers/net/dpaa/dpaa_rxtx.c +++ b/drivers/net/dpaa/dpaa_rxtx.c @@ -398,6 +398,69 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid) return mbuf; } +uint16_t +dpaa_free_mbuf(const struct qm_fd *fd) +{ + struct rte_mbuf *mbuf; + struct dpaa_bp_info *bp_info; + uint8_t format; + void *ptr; + + bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid); + format = (fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT; + if (unlikely(format == qm_fd_sg)) { + struct rte_mbuf *first_seg, *prev_seg, *cur_seg, *temp; + struct qm_sg_entry *sgt, *sg_temp; + void *vaddr, *sg_vaddr; + int i = 0; + uint16_t fd_offset = fd->offset; + + vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd)); + if (!vaddr) { + DPAA_PMD_ERR("unable to convert physical address"); + return -1; + } + sgt = vaddr + fd_offset; + sg_temp = &sgt[i++]; + hw_sg_to_cpu(sg_temp); + temp = (struct rte_mbuf *) + ((char *)vaddr - bp_info->meta_data_size); + sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info, + qm_sg_entry_get64(sg_temp)); + + first_seg = (struct rte_mbuf *)((char *)sg_vaddr - + bp_info->meta_data_size); + first_seg->nb_segs = 1; + prev_seg = first_seg; + while (i < DPAA_SGT_MAX_ENTRIES) { + sg_temp = &sgt[i++]; + hw_sg_to_cpu(sg_temp); + sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info, + qm_sg_entry_get64(sg_temp)); + cur_seg = (struct rte_mbuf *)((char *)sg_vaddr - + bp_info->meta_data_size); + first_seg->nb_segs += 1; + prev_seg->next = cur_seg; + if (sg_temp->final) { + cur_seg->next = NULL; + break; + } + prev_seg = cur_seg; + } + + rte_pktmbuf_free_seg(temp); + rte_pktmbuf_free_seg(first_seg); + return 0; + } + + ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd)); + mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size); + + rte_pktmbuf_free(mbuf); + + return 0; +} + /* Specific for LS1043 */ void dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr, @@ -1011,6 +1074,14 @@ send_pkts: return sent; } +uint16_t +dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) +{ + qman_ern_poll_free(); + + return dpaa_eth_queue_tx(q, bufs, nb_bufs); +} + uint16_t dpaa_eth_tx_drop_all(void *q __rte_unused, struct rte_mbuf **bufs __rte_unused, uint16_t nb_bufs __rte_unused) diff --git a/drivers/net/dpaa/dpaa_rxtx.h b/drivers/net/dpaa/dpaa_rxtx.h index 4f896fba11..fe8eb6dc76 100644 --- a/drivers/net/dpaa/dpaa_rxtx.h +++ b/drivers/net/dpaa/dpaa_rxtx.h @@ -254,6 +254,8 @@ struct annotations_t { uint16_t dpaa_eth_queue_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs); +uint16_t dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs, + uint16_t nb_bufs); uint16_t dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs); uint16_t dpaa_eth_tx_drop_all(void *q __rte_unused, @@ -266,6 +268,7 @@ int dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf, struct qm_fd *fd, uint32_t bpid); +uint16_t dpaa_free_mbuf(const struct qm_fd *fd); void dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr, void **bufs, int num_bufs); -- 2.20.1