net/dpaa: enable Tx queue taildrop
authorGagandeep Singh <g.singh@nxp.com>
Tue, 7 Jul 2020 09:22:24 +0000 (14:52 +0530)
committerFerruh Yigit <ferruh.yigit@intel.com>
Sat, 11 Jul 2020 04:18:52 +0000 (06:18 +0200)
Enable congestion handling/tail drop for TX queues.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
drivers/bus/dpaa/base/qbman/qman.c
drivers/bus/dpaa/include/fsl_qman.h
drivers/bus/dpaa/rte_bus_dpaa_version.map
drivers/net/dpaa/dpaa_ethdev.c
drivers/net/dpaa/dpaa_ethdev.h
drivers/net/dpaa/dpaa_rxtx.c
drivers/net/dpaa/dpaa_rxtx.h

index b596e79..447c091 100644 (file)
@@ -40,6 +40,8 @@
                        spin_unlock(&__fq478->fqlock); \
        } while (0)
 
+static qman_cb_free_mbuf qman_free_mbuf_cb;
+
 static inline void fq_set(struct qman_fq *fq, u32 mask)
 {
        dpaa_set_bits(mask, &fq->flags);
@@ -790,6 +792,47 @@ static inline void fq_state_change(struct qman_portal *p, struct qman_fq *fq,
        FQUNLOCK(fq);
 }
 
+void
+qman_ern_register_cb(qman_cb_free_mbuf cb)
+{
+       qman_free_mbuf_cb = cb;
+}
+
+
+void
+qman_ern_poll_free(void)
+{
+       struct qman_portal *p = get_affine_portal();
+       u8 verb, num = 0;
+       const struct qm_mr_entry *msg;
+       const struct qm_fd *fd;
+       struct qm_mr_entry swapped_msg;
+
+       qm_mr_pvb_update(&p->p);
+       msg = qm_mr_current(&p->p);
+
+       while (msg != NULL) {
+               swapped_msg = *msg;
+               hw_fd_to_cpu(&swapped_msg.ern.fd);
+               verb = msg->ern.verb & QM_MR_VERB_TYPE_MASK;
+               fd = &swapped_msg.ern.fd;
+
+               if (unlikely(verb & 0x20)) {
+                       printf("HW ERN notification, Nothing to do\n");
+               } else {
+                       if ((fd->bpid & 0xff) != 0xff)
+                               qman_free_mbuf_cb(fd);
+               }
+
+               num++;
+               qm_mr_next(&p->p);
+               qm_mr_pvb_update(&p->p);
+               msg = qm_mr_current(&p->p);
+       }
+
+       qm_mr_cci_consume(&p->p, num);
+}
+
 static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 {
        const struct qm_mr_entry *msg;
index 78b698f..0d9cfc3 100644 (file)
@@ -1158,6 +1158,10 @@ typedef void (*qman_cb_mr)(struct qman_portal *qm, struct qman_fq *fq,
 /* This callback type is used when handling DCP ERNs */
 typedef void (*qman_cb_dc_ern)(struct qman_portal *qm,
                                const struct qm_mr_entry *msg);
+
+/* This callback function will be used to free mbufs of ERN */
+typedef uint16_t (*qman_cb_free_mbuf)(const struct qm_fd *fd);
+
 /*
  * s/w-visible states. Ie. tentatively scheduled + truly scheduled + active +
  * held-active + held-suspended are just "sched". Things like "retired" will not
@@ -1808,6 +1812,19 @@ __rte_internal
 int qman_enqueue_multi(struct qman_fq *fq, const struct qm_fd *fd, u32 *flags,
                       int frames_to_send);
 
+/**
+ * qman_ern_poll_free - Polling on MR and calling a callback function to free
+ * mbufs when SW ERNs received.
+ */
+__rte_internal
+void qman_ern_poll_free(void);
+
+/**
+ * qman_ern_register_cb - Register a callback function to free buffers.
+ */
+__rte_internal
+void qman_ern_register_cb(qman_cb_free_mbuf cb);
+
 /**
  * qman_enqueue_multi_fq - Enqueue multiple frames to their respective frame
  * queues.
index 491c507..86e59f6 100644 (file)
@@ -57,6 +57,8 @@ INTERNAL {
        qman_enqueue;
        qman_enqueue_multi;
        qman_enqueue_multi_fq;
+       qman_ern_poll_free;
+       qman_ern_register_cb;
        qman_fq_fqid;
        qman_fq_portal_irqsource_add;
        qman_fq_portal_irqsource_remove;
index f1c9a71..fd2c0c6 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2017-2019 NXP
+ *   Copyright 2017-2020 NXP
  *
  */
 /* System headers */
@@ -86,9 +86,12 @@ static int dpaa_push_mode_max_queue = DPAA_DEFAULT_PUSH_MODE_QUEUE;
 static int dpaa_push_queue_idx; /* Queue index which are in push mode*/
 
 
-/* Per FQ Taildrop in frame count */
+/* Per RX FQ Taildrop in frame count */
 static unsigned int td_threshold = CGR_RX_PERFQ_THRESH;
 
+/* Per TX FQ Taildrop in frame count, disabled by default */
+static unsigned int td_tx_threshold;
+
 struct rte_dpaa_xstats_name_off {
        char name[RTE_ETH_XSTATS_NAME_SIZE];
        uint32_t offset;
@@ -275,7 +278,11 @@ static int dpaa_eth_dev_start(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        /* Change tx callback to the real one */
-       dev->tx_pkt_burst = dpaa_eth_queue_tx;
+       if (dpaa_intf->cgr_tx)
+               dev->tx_pkt_burst = dpaa_eth_queue_tx_slow;
+       else
+               dev->tx_pkt_burst = dpaa_eth_queue_tx;
+
        fman_if_enable_rx(dpaa_intf->fif);
 
        return 0;
@@ -867,6 +874,7 @@ int dpaa_eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        DPAA_PMD_INFO("Tx queue setup for queue index: %d fq_id (0x%x)",
                        queue_idx, dpaa_intf->tx_queues[queue_idx].fqid);
        dev->data->tx_queues[queue_idx] = &dpaa_intf->tx_queues[queue_idx];
+
        return 0;
 }
 
@@ -1236,9 +1244,19 @@ without_cgr:
 
 /* Initialise a Tx FQ */
 static int dpaa_tx_queue_init(struct qman_fq *fq,
-                             struct fman_if *fman_intf)
+                             struct fman_if *fman_intf,
+                             struct qman_cgr *cgr_tx)
 {
        struct qm_mcc_initfq opts = {0};
+       struct qm_mcc_initcgr cgr_opts = {
+               .we_mask = QM_CGR_WE_CS_THRES |
+                               QM_CGR_WE_CSTD_EN |
+                               QM_CGR_WE_MODE,
+               .cgr = {
+                       .cstd_en = QM_CGR_EN,
+                       .mode = QMAN_CGR_MODE_FRAME
+               }
+       };
        int ret;
 
        ret = qman_create_fq(0, QMAN_FQ_FLAG_DYNAMIC_FQID |
@@ -1257,6 +1275,27 @@ static int dpaa_tx_queue_init(struct qman_fq *fq,
        opts.fqd.context_a.hi = 0x80000000 | fman_dealloc_bufs_mask_hi;
        opts.fqd.context_a.lo = 0 | fman_dealloc_bufs_mask_lo;
        DPAA_PMD_DEBUG("init tx fq %p, fqid 0x%x", fq, fq->fqid);
+
+       if (cgr_tx) {
+               /* Enable tail drop with cgr on this queue */
+               qm_cgr_cs_thres_set64(&cgr_opts.cgr.cs_thres,
+                                     td_tx_threshold, 0);
+               cgr_tx->cb = NULL;
+               ret = qman_create_cgr(cgr_tx, QMAN_CGR_FLAG_USE_INIT,
+                                     &cgr_opts);
+               if (ret) {
+                       DPAA_PMD_WARN(
+                               "rx taildrop init fail on rx fqid 0x%x(ret=%d)",
+                               fq->fqid, ret);
+                       goto without_cgr;
+               }
+               opts.we_mask |= QM_INITFQ_WE_CGID;
+               opts.fqd.cgid = cgr_tx->cgrid;
+               opts.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+               DPAA_PMD_DEBUG("Tx FQ tail drop enabled, threshold = %d\n",
+                               td_tx_threshold);
+       }
+without_cgr:
        ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts);
        if (ret)
                DPAA_PMD_ERR("init tx fqid 0x%x failed %d", fq->fqid, ret);
@@ -1309,6 +1348,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
        struct fman_if *fman_intf;
        struct fman_if_bpool *bp, *tmp_bp;
        uint32_t cgrid[DPAA_MAX_NUM_PCD_QUEUES];
+       uint32_t cgrid_tx[MAX_DPAA_CORES];
        char eth_buf[RTE_ETHER_ADDR_FMT_SIZE];
 
        PMD_INIT_FUNC_TRACE();
@@ -1319,7 +1359,10 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
                eth_dev->dev_ops = &dpaa_devops;
                /* Plugging of UCODE burst API not supported in Secondary */
                eth_dev->rx_pkt_burst = dpaa_eth_queue_rx;
-               eth_dev->tx_pkt_burst = dpaa_eth_queue_tx;
+               if (dpaa_intf->cgr_tx)
+                       eth_dev->tx_pkt_burst = dpaa_eth_queue_tx_slow;
+               else
+                       eth_dev->tx_pkt_burst = dpaa_eth_queue_tx;
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
                qman_set_fq_lookup_table(
                                dpaa_intf->rx_queues->qman_fq_lookup_table);
@@ -1366,6 +1409,21 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
                return -ENOMEM;
        }
 
+       memset(cgrid, 0, sizeof(cgrid));
+       memset(cgrid_tx, 0, sizeof(cgrid_tx));
+
+       /* if DPAA_TX_TAILDROP_THRESHOLD is set, use that value; if 0, it means
+        * Tx tail drop is disabled.
+        */
+       if (getenv("DPAA_TX_TAILDROP_THRESHOLD")) {
+               td_tx_threshold = atoi(getenv("DPAA_TX_TAILDROP_THRESHOLD"));
+               DPAA_PMD_DEBUG("Tail drop threshold env configured: %u",
+                              td_tx_threshold);
+               /* if a very large value is being configured */
+               if (td_tx_threshold > UINT16_MAX)
+                       td_tx_threshold = CGR_RX_PERFQ_THRESH;
+       }
+
        /* If congestion control is enabled globally*/
        if (td_threshold) {
                dpaa_intf->cgr_rx = rte_zmalloc(NULL,
@@ -1414,9 +1472,36 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
                goto free_rx;
        }
 
+       /* If congestion control is enabled globally*/
+       if (td_tx_threshold) {
+               dpaa_intf->cgr_tx = rte_zmalloc(NULL,
+                       sizeof(struct qman_cgr) * MAX_DPAA_CORES,
+                       MAX_CACHELINE);
+               if (!dpaa_intf->cgr_tx) {
+                       DPAA_PMD_ERR("Failed to alloc mem for cgr_tx\n");
+                       ret = -ENOMEM;
+                       goto free_rx;
+               }
+
+               ret = qman_alloc_cgrid_range(&cgrid_tx[0], MAX_DPAA_CORES,
+                                            1, 0);
+               if (ret != MAX_DPAA_CORES) {
+                       DPAA_PMD_WARN("insufficient CGRIDs available");
+                       ret = -EINVAL;
+                       goto free_rx;
+               }
+       } else {
+               dpaa_intf->cgr_tx = NULL;
+       }
+
+
        for (loop = 0; loop < MAX_DPAA_CORES; loop++) {
+               if (dpaa_intf->cgr_tx)
+                       dpaa_intf->cgr_tx[loop].cgrid = cgrid_tx[loop];
+
                ret = dpaa_tx_queue_init(&dpaa_intf->tx_queues[loop],
-                                        fman_intf);
+                       fman_intf,
+                       dpaa_intf->cgr_tx ? &dpaa_intf->cgr_tx[loop] : NULL);
                if (ret)
                        goto free_tx;
                dpaa_intf->tx_queues[loop].dpaa_intf = dpaa_intf;
@@ -1487,6 +1572,7 @@ free_tx:
 
 free_rx:
        rte_free(dpaa_intf->cgr_rx);
+       rte_free(dpaa_intf->cgr_tx);
        rte_free(dpaa_intf->rx_queues);
        dpaa_intf->rx_queues = NULL;
        dpaa_intf->nb_rx_queues = 0;
@@ -1527,6 +1613,17 @@ dpaa_dev_uninit(struct rte_eth_dev *dev)
        rte_free(dpaa_intf->cgr_rx);
        dpaa_intf->cgr_rx = NULL;
 
+       /* Release TX congestion Groups */
+       if (dpaa_intf->cgr_tx) {
+               for (loop = 0; loop < MAX_DPAA_CORES; loop++)
+                       qman_delete_cgr(&dpaa_intf->cgr_tx[loop]);
+
+               qman_release_cgrid_range(dpaa_intf->cgr_tx[loop].cgrid,
+                                        MAX_DPAA_CORES);
+               rte_free(dpaa_intf->cgr_tx);
+               dpaa_intf->cgr_tx = NULL;
+       }
+
        rte_free(dpaa_intf->rx_queues);
        dpaa_intf->rx_queues = NULL;
 
@@ -1631,6 +1728,8 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused,
        eth_dev->device = &dpaa_dev->device;
        dpaa_dev->eth_dev = eth_dev;
 
+       qman_ern_register_cb(dpaa_free_mbuf);
+
        /* Invoke PMD device initialization function */
        diag = dpaa_dev_init(eth_dev);
        if (diag == 0) {
index 6a6477a..d4261f8 100644 (file)
@@ -111,6 +111,7 @@ struct dpaa_if {
        struct qman_fq *rx_queues;
        struct qman_cgr *cgr_rx;
        struct qman_fq *tx_queues;
+       struct qman_cgr *cgr_tx;
        struct qman_fq debug_queues[2];
        uint16_t nb_rx_queues;
        uint16_t nb_tx_queues;
index 3aeecb7..819cad7 100644 (file)
@@ -398,6 +398,69 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        return mbuf;
 }
 
+uint16_t
+dpaa_free_mbuf(const struct qm_fd *fd)
+{
+       struct rte_mbuf *mbuf;
+       struct dpaa_bp_info *bp_info;
+       uint8_t format;
+       void *ptr;
+
+       bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
+       format = (fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT;
+       if (unlikely(format == qm_fd_sg)) {
+               struct rte_mbuf *first_seg, *prev_seg, *cur_seg, *temp;
+               struct qm_sg_entry *sgt, *sg_temp;
+               void *vaddr, *sg_vaddr;
+               int i = 0;
+               uint16_t fd_offset = fd->offset;
+
+               vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
+               if (!vaddr) {
+                       DPAA_PMD_ERR("unable to convert physical address");
+                       return -1;
+               }
+               sgt = vaddr + fd_offset;
+               sg_temp = &sgt[i++];
+               hw_sg_to_cpu(sg_temp);
+               temp = (struct rte_mbuf *)
+                       ((char *)vaddr - bp_info->meta_data_size);
+               sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+                                               qm_sg_entry_get64(sg_temp));
+
+               first_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+                                               bp_info->meta_data_size);
+               first_seg->nb_segs = 1;
+               prev_seg = first_seg;
+               while (i < DPAA_SGT_MAX_ENTRIES) {
+                       sg_temp = &sgt[i++];
+                       hw_sg_to_cpu(sg_temp);
+                       sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+                                               qm_sg_entry_get64(sg_temp));
+                       cur_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+                                                     bp_info->meta_data_size);
+                       first_seg->nb_segs += 1;
+                       prev_seg->next = cur_seg;
+                       if (sg_temp->final) {
+                               cur_seg->next = NULL;
+                               break;
+                       }
+                       prev_seg = cur_seg;
+               }
+
+               rte_pktmbuf_free_seg(temp);
+               rte_pktmbuf_free_seg(first_seg);
+               return 0;
+       }
+
+       ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
+       mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+
+       rte_pktmbuf_free(mbuf);
+
+       return 0;
+}
+
 /* Specific for LS1043 */
 void
 dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
@@ -1011,6 +1074,14 @@ send_pkts:
        return sent;
 }
 
+uint16_t
+dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+       qman_ern_poll_free();
+
+       return dpaa_eth_queue_tx(q, bufs, nb_bufs);
+}
+
 uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
                              struct rte_mbuf **bufs __rte_unused,
                uint16_t nb_bufs __rte_unused)
index 4f896fb..fe8eb6d 100644 (file)
@@ -254,6 +254,8 @@ struct annotations_t {
 
 uint16_t dpaa_eth_queue_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
 
+uint16_t dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs,
+                               uint16_t nb_bufs);
 uint16_t dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
 
 uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
@@ -266,6 +268,7 @@ int dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
                           struct qm_fd *fd,
                           uint32_t bpid);
 
+uint16_t dpaa_free_mbuf(const struct qm_fd *fd);
 void dpaa_rx_cb(struct qman_fq **fq,
                struct qm_dqrr_entry **dqrr, void **bufs, int num_bufs);