cxgbe: optimize forwarding performance for 40G
authorRahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Thu, 8 Oct 2015 13:46:05 +0000 (19:16 +0530)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Tue, 20 Oct 2015 16:49:18 +0000 (18:49 +0200)
Update sge initialization with respect to free-list manager configuration
and ingress arbiter. Also update refill logic to refill mbufs only after
a certain threshold for rx.  Optimize tx packet prefetch.

Approx. 3 MPPS improvement seen in forwarding performance after the
optimization.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
doc/guides/rel_notes/release_2_2.rst
drivers/net/cxgbe/base/t4_regs.h
drivers/net/cxgbe/cxgbe_main.c
drivers/net/cxgbe/sge.c

index 89cba4d..3068ef8 100644 (file)
@@ -4,6 +4,10 @@ DPDK Release 2.2
 New Features
 ------------
 
+* **Enhanced support for the Chelsio CXGBE driver.**
+
+  *  Optimize forwarding performance for Chelsio T5 40GbE cards.
+
 
 Resolved Issues
 ---------------
index cd28b59..9057e40 100644 (file)
 #define A_SGE_FL_BUFFER_SIZE2 0x104c
 #define A_SGE_FL_BUFFER_SIZE3 0x1050
 
+#define A_SGE_FLM_CFG 0x1090
+
+#define S_CREDITCNT    4
+#define M_CREDITCNT    0x3U
+#define V_CREDITCNT(x) ((x) << S_CREDITCNT)
+#define G_CREDITCNT(x) (((x) >> S_CREDITCNT) & M_CREDITCNT)
+
+#define S_CREDITCNTPACKING    2
+#define M_CREDITCNTPACKING    0x3U
+#define V_CREDITCNTPACKING(x) ((x) << S_CREDITCNTPACKING)
+#define G_CREDITCNTPACKING(x) (((x) >> S_CREDITCNTPACKING) & M_CREDITCNTPACKING)
+
 #define A_SGE_CONM_CTRL 0x1094
 
 #define S_EGRTHRESHOLD    8
 
 #define A_SGE_CONTROL2 0x1124
 
+#define S_IDMAARBROUNDROBIN    19
+#define V_IDMAARBROUNDROBIN(x) ((x) << S_IDMAARBROUNDROBIN)
+#define F_IDMAARBROUNDROBIN    V_IDMAARBROUNDROBIN(1U)
+
 #define S_INGPACKBOUNDARY    16
 #define M_INGPACKBOUNDARY    0x7U
 #define V_INGPACKBOUNDARY(x) ((x) << S_INGPACKBOUNDARY)
index 3755444..316b87d 100644 (file)
@@ -422,6 +422,13 @@ static int adap_init0_tweaks(struct adapter *adapter)
        t4_set_reg_field(adapter, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT),
                         V_PKTSHIFT(rx_dma_offset));
 
+       t4_set_reg_field(adapter, A_SGE_FLM_CFG,
+                        V_CREDITCNT(M_CREDITCNT) | M_CREDITCNTPACKING,
+                        V_CREDITCNT(3) | V_CREDITCNTPACKING(1));
+
+       t4_set_reg_field(adapter, A_SGE_CONTROL2, V_IDMAARBROUNDROBIN(1U),
+                        V_IDMAARBROUNDROBIN(1U));
+
        /*
         * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
         * adds the pseudo header itself.
index 6eb1244..69ab487 100644 (file)
@@ -286,8 +286,7 @@ static void unmap_rx_buf(struct sge_fl *q)
 
 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 {
-       /* see if we have exceeded q->size / 4 */
-       if (q->pend_cred >= (q->size / 4)) {
+       if (q->pend_cred >= 64) {
                u32 val = adap->params.arch.sge_fl_db;
 
                if (is_t4(adap->params.chip))
@@ -1054,7 +1053,6 @@ out_free:
                return 0;
        }
 
-       rte_prefetch0(&((&txq->q)->sdesc->mbuf->pool));
        pi = (struct port_info *)txq->eth_dev->data->dev_private;
        adap = pi->adapter;
 
@@ -1070,6 +1068,7 @@ out_free:
                                txq->stats.mapping_err++;
                                goto out_free;
                        }
+                       rte_prefetch0((volatile void *)addr);
                        return tx_do_packet_coalesce(txq, mbuf, cflits, adap,
                                                     pi, addr);
                } else {
@@ -1454,7 +1453,8 @@ static int process_responses(struct sge_rspq *q, int budget,
                        unsigned int params;
                        u32 val;
 
-                       __refill_fl(q->adapter, &rxq->fl);
+                       if (fl_cap(&rxq->fl) - rxq->fl.avail >= 64)
+                               __refill_fl(q->adapter, &rxq->fl);
                        params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
                        q->next_intr_params = params;
                        val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);