From 880ead4e14a4dcecb04a361bcff3a9ce82c2c5ae Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumaras@chelsio.com>
Date: Sun, 11 Mar 2018 04:18:25 +0530
Subject: [PATCH] net/cxgbe: update Tx and Rx path for VF

On TX path, add fw_eth_tx_pkt_vm_wr to transmit packets over VF.
Use is_pf4() to correctly calculate the work request size and
offsets within the work request.  On RX path, use pktshift to adjust
data offset within the mbuf.

Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h        |   5 +
 drivers/net/cxgbe/base/t4fw_interface.h |  25 +++++
 drivers/net/cxgbe/cxgbe_ethdev.c        |   8 +-
 drivers/net/cxgbe/cxgbe_pfvf.h          |   4 +
 drivers/net/cxgbe/cxgbevf_ethdev.c      |   4 +-
 drivers/net/cxgbe/sge.c                 | 139 +++++++++++++++++++-----
 6 files changed, 149 insertions(+), 36 deletions(-)

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 95752d1b4a..fcea4055a9 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -198,6 +198,7 @@ struct sge_eth_rxq {                /* a SW Ethernet Rx queue */
  * scenario where a packet needs 32 bytes.
  */
 #define ETH_COALESCE_PKT_NUM 15
+#define ETH_COALESCE_VF_PKT_NUM 7
 #define ETH_COALESCE_PKT_PER_DESC 2
 
 struct tx_eth_coal_desc {
@@ -227,6 +228,10 @@ struct eth_coalesce {
 	unsigned int len;
 	unsigned int flits;
 	unsigned int max;
+	__u8 ethmacdst[ETHER_ADDR_LEN];
+	__u8 ethmacsrc[ETHER_ADDR_LEN];
+	__be16 ethtype;
+	__be16 vlantci;
 };
 
 struct sge_txq {
diff --git a/drivers/net/cxgbe/base/t4fw_interface.h b/drivers/net/cxgbe/base/t4fw_interface.h
index 3a89814ff3..274f00b957 100644
--- a/drivers/net/cxgbe/base/t4fw_interface.h
+++ b/drivers/net/cxgbe/base/t4fw_interface.h
@@ -84,6 +84,8 @@ enum fw_memtype {
 enum fw_wr_opcodes {
 	FW_ETH_TX_PKT_WR	= 0x08,
 	FW_ETH_TX_PKTS_WR	= 0x09,
+	FW_ETH_TX_PKT_VM_WR	= 0x11,
+	FW_ETH_TX_PKTS_VM_WR	= 0x12,
 	FW_ETH_TX_PKTS2_WR      = 0x78,
 };
 
@@ -146,6 +148,29 @@ struct fw_eth_tx_pkts_wr {
 	__u8   type;
 };
 
+struct fw_eth_tx_pkt_vm_wr {
+	__be32 op_immdlen;
+	__be32 equiq_to_len16;
+	__be32 r3[2];
+	__u8   ethmacdst[6];
+	__u8   ethmacsrc[6];
+	__be16 ethtype;
+	__be16 vlantci;
+};
+
+struct fw_eth_tx_pkts_vm_wr {
+	__be32 op_pkd;
+	__be32 equiq_to_len16;
+	__be32 r3;
+	__be16 plen;
+	__u8   npkt;
+	__u8   r4;
+	__u8   ethmacdst[6];
+	__u8   ethmacsrc[6];
+	__be16 ethtype;
+	__be16 vlantci;
+};
+
 /******************************************************************************
  *  C O M M A N D s
  *********************/
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index ef0a3f2654..f039364876 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -86,8 +86,8 @@
  */
 #include "t4_pci_id_tbl.h"
 
-static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-				uint16_t nb_pkts)
+uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			 uint16_t nb_pkts)
 {
 	struct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue;
 	uint16_t pkts_sent, pkts_remain;
@@ -120,8 +120,8 @@ static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return total_sent;
 }
 
-static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-				uint16_t nb_pkts)
+uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			 uint16_t nb_pkts)
 {
 	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)rx_queue;
 	unsigned int work_done;
diff --git a/drivers/net/cxgbe/cxgbe_pfvf.h b/drivers/net/cxgbe/cxgbe_pfvf.h
index e3d8533cae..19bfd6d92a 100644
--- a/drivers/net/cxgbe/cxgbe_pfvf.h
+++ b/drivers/net/cxgbe/cxgbe_pfvf.h
@@ -34,5 +34,9 @@ int cxgbe_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu);
 int cxgbe_dev_start(struct rte_eth_dev *eth_dev);
 int cxgbe_dev_link_update(struct rte_eth_dev *eth_dev,
 			  int wait_to_complete);
+uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			 uint16_t nb_pkts);
+uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			 uint16_t nb_pkts);
 const uint32_t *cxgbe_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev);
 #endif /* _CXGBE_PFVF_H_ */
diff --git a/drivers/net/cxgbe/cxgbevf_ethdev.c b/drivers/net/cxgbe/cxgbevf_ethdev.c
index 3b1deac523..a966303410 100644
--- a/drivers/net/cxgbe/cxgbevf_ethdev.c
+++ b/drivers/net/cxgbe/cxgbevf_ethdev.c
@@ -68,8 +68,8 @@ static int eth_cxgbevf_dev_init(struct rte_eth_dev *eth_dev)
 	CXGBE_FUNC_TRACE();
 
 	eth_dev->dev_ops = &cxgbevf_eth_dev_ops;
-	eth_dev->rx_pkt_burst = NULL;
-	eth_dev->tx_pkt_burst = NULL;
+	eth_dev->rx_pkt_burst = &cxgbe_recv_pkts;
+	eth_dev->tx_pkt_burst = &cxgbe_xmit_pkts;
 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
 	/* for secondary processes, we attach to ethdevs allocated by primary
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index aba1a49f3d..54e13fb9ad 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -337,7 +337,11 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 		 * mechanism.
 		 */
 		if (unlikely(!q->bar2_addr)) {
-			t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+			u32 reg = is_pf4(adap) ? MYPF_REG(A_SGE_PF_KDOORBELL) :
+						 T4VF_SGE_BASE_ADDR +
+						 A_SGE_VF_KDOORBELL;
+
+			t4_write_reg_relaxed(adap, reg,
 					     val | V_QID(q->cntxt_id));
 		} else {
 			writel_relaxed(val | V_QID(q->bar2_qid),
@@ -570,12 +574,16 @@ static inline int is_eth_imm(const struct rte_mbuf *m)
 /**
  * calc_tx_flits - calculate the number of flits for a packet Tx WR
  * @m: the packet
+ * @adap: adapter structure pointer
  *
  * Returns the number of flits needed for a Tx WR for the given Ethernet
  * packet, including the needed WR and CPL headers.
  */
-static inline unsigned int calc_tx_flits(const struct rte_mbuf *m)
+static inline unsigned int calc_tx_flits(const struct rte_mbuf *m,
+					 struct adapter *adap)
 {
+	size_t wr_size = is_pf4(adap) ? sizeof(struct fw_eth_tx_pkt_wr) :
+					sizeof(struct fw_eth_tx_pkt_vm_wr);
 	unsigned int flits;
 	int hdrlen;
 
@@ -600,11 +608,10 @@ static inline unsigned int calc_tx_flits(const struct rte_mbuf *m)
 	 */
 	flits = sgl_len(m->nb_segs);
 	if (m->tso_segsz)
-		flits += (sizeof(struct fw_eth_tx_pkt_wr) +
-			  sizeof(struct cpl_tx_pkt_lso_core) +
+		flits += (wr_size + sizeof(struct cpl_tx_pkt_lso_core) +
 			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
 	else
-		flits += (sizeof(struct fw_eth_tx_pkt_wr) +
+		flits += (wr_size +
 			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
 	return flits;
 }
@@ -848,14 +855,20 @@ static void tx_timer_cb(void *data)
 static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,
 					   struct sge_eth_txq *txq)
 {
-	u32 wr_mid;
-	struct sge_txq *q = &txq->q;
+	struct fw_eth_tx_pkts_vm_wr *vmwr;
+	const size_t fw_hdr_copy_len = (sizeof(vmwr->ethmacdst) +
+					sizeof(vmwr->ethmacsrc) +
+					sizeof(vmwr->ethtype) +
+					sizeof(vmwr->vlantci));
 	struct fw_eth_tx_pkts_wr *wr;
+	struct sge_txq *q = &txq->q;
 	unsigned int ndesc;
+	u32 wr_mid;
 
 	/* fill the pkts WR header */
 	wr = (void *)&q->desc[q->pidx];
 	wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR));
+	vmwr = (void *)&q->desc[q->pidx];
 
 	wr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(q->coalesce.flits, 2));
 	ndesc = flits_to_desc(q->coalesce.flits);
@@ -863,12 +876,18 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,
 	wr->plen = cpu_to_be16(q->coalesce.len);
 	wr->npkt = q->coalesce.idx;
 	wr->r3 = 0;
-	wr->type = q->coalesce.type;
+	if (is_pf4(adap)) {
+		wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR));
+		wr->type = q->coalesce.type;
+	} else {
+		wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR));
+		vmwr->r4 = 0;
+		memcpy((void *)vmwr->ethmacdst, (void *)q->coalesce.ethmacdst,
+		       fw_hdr_copy_len);
+	}
 
 	/* zero out coalesce structure members */
-	q->coalesce.idx = 0;
-	q->coalesce.flits = 0;
-	q->coalesce.len = 0;
+	memset((void *)&q->coalesce, 0, sizeof(struct eth_coalesce));
 
 	txq_advance(q, ndesc);
 	txq->stats.coal_wr++;
@@ -896,13 +915,27 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq,
 					    unsigned int *nflits,
 					    struct adapter *adap)
 {
+	struct fw_eth_tx_pkts_vm_wr *wr;
+	const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) +
+					sizeof(wr->ethmacsrc) +
+					sizeof(wr->ethtype) +
+					sizeof(wr->vlantci));
 	struct sge_txq *q = &txq->q;
 	unsigned int flits, ndesc;
 	unsigned char type = 0;
-	int credits;
+	int credits, wr_size;
 
 	/* use coal WR type 1 when no frags are present */
 	type = (mbuf->nb_segs == 1) ? 1 : 0;
+	if (!is_pf4(adap)) {
+		if (!type)
+			return 0;
+
+		if (q->coalesce.idx && memcmp((void *)q->coalesce.ethmacdst,
+					      rte_pktmbuf_mtod(mbuf, void *),
+					      fw_hdr_copy_len))
+			ship_tx_pkt_coalesce_wr(adap, txq);
+	}
 
 	if (unlikely(type != q->coalesce.type && q->coalesce.idx))
 		ship_tx_pkt_coalesce_wr(adap, txq);
@@ -948,16 +981,21 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq,
 
 new:
 	/* start a new pkts WR, the WR header is not filled below */
-	flits += sizeof(struct fw_eth_tx_pkts_wr) / sizeof(__be64);
+	wr_size = is_pf4(adap) ? sizeof(struct fw_eth_tx_pkts_wr) :
+				 sizeof(struct fw_eth_tx_pkts_vm_wr);
+	flits += wr_size / sizeof(__be64);
 	ndesc = flits_to_desc(q->coalesce.flits + flits);
 	credits = txq_avail(q) - ndesc;
 
 	if (unlikely(credits < 0 || wraps_around(q, ndesc)))
 		return 0;
-	q->coalesce.flits += 2;
+	q->coalesce.flits += wr_size / sizeof(__be64);
 	q->coalesce.type = type;
 	q->coalesce.ptr = (unsigned char *)&q->desc[q->pidx] +
-			   2 * sizeof(__be64);
+			   q->coalesce.flits * sizeof(__be64);
+	if (!is_pf4(adap))
+		memcpy((void *)q->coalesce.ethmacdst,
+		       rte_pktmbuf_mtod(mbuf, void *), fw_hdr_copy_len);
 	return 1;
 }
 
@@ -987,6 +1025,8 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 	struct cpl_tx_pkt_core *cpl;
 	struct tx_sw_desc *sd;
 	unsigned int idx = q->coalesce.idx, len = mbuf->pkt_len;
+	unsigned int max_coal_pkt_num = is_pf4(adap) ? ETH_COALESCE_PKT_NUM :
+						       ETH_COALESCE_VF_PKT_NUM;
 
 #ifdef RTE_LIBRTE_CXGBE_TPUT
 	RTE_SET_USED(nb_pkts);
@@ -1030,9 +1070,12 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(mbuf->vlan_tci);
 	}
 
-	cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
-			   V_TXPKT_INTF(pi->tx_chan) |
-			   V_TXPKT_PF(adap->pf));
+	cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT));
+	if (is_pf4(adap))
+		cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->tx_chan) |
+				    V_TXPKT_PF(adap->pf));
+	else
+		cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->port_id));
 	cpl->pack = htons(0);
 	cpl->len = htons(len);
 	cpl->ctrl1 = cpu_to_be64(cntrl);
@@ -1061,7 +1104,7 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 	sd->coalesce.idx = (idx & 1) + 1;
 
 	/* send the coaelsced work request if max reached */
-	if (++q->coalesce.idx == ETH_COALESCE_PKT_NUM
+	if (++q->coalesce.idx == max_coal_pkt_num
 #ifndef RTE_LIBRTE_CXGBE_TPUT
 	    || q->coalesce.idx >= nb_pkts
 #endif
@@ -1085,6 +1128,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf,
 	struct adapter *adap;
 	struct rte_mbuf *m = mbuf;
 	struct fw_eth_tx_pkt_wr *wr;
+	struct fw_eth_tx_pkt_vm_wr *vmwr;
 	struct cpl_tx_pkt_core *cpl;
 	struct tx_sw_desc *d;
 	dma_addr_t addr[m->nb_segs];
@@ -1141,7 +1185,7 @@ out_free:
 	if (txq->q.coalesce.idx)
 		ship_tx_pkt_coalesce_wr(adap, txq);
 
-	flits = calc_tx_flits(m);
+	flits = calc_tx_flits(m, adap);
 	ndesc = flits_to_desc(flits);
 	credits = txq_avail(&txq->q) - ndesc;
 
@@ -1163,31 +1207,55 @@ out_free:
 	}
 
 	wr = (void *)&txq->q.desc[txq->q.pidx];
+	vmwr = (void *)&txq->q.desc[txq->q.pidx];
 	wr->equiq_to_len16 = htonl(wr_mid);
-	wr->r3 = rte_cpu_to_be_64(0);
-	end = (u64 *)wr + flits;
+	if (is_pf4(adap)) {
+		wr->r3 = rte_cpu_to_be_64(0);
+		end = (u64 *)wr + flits;
+	} else {
+		const size_t fw_hdr_copy_len = (sizeof(vmwr->ethmacdst) +
+						sizeof(vmwr->ethmacsrc) +
+						sizeof(vmwr->ethtype) +
+						sizeof(vmwr->vlantci));
+
+		vmwr->r3[0] = rte_cpu_to_be_32(0);
+		vmwr->r3[1] = rte_cpu_to_be_32(0);
+		memcpy((void *)vmwr->ethmacdst, rte_pktmbuf_mtod(m, void *),
+		       fw_hdr_copy_len);
+		end = (u64 *)vmwr + flits;
+	}
 
 	len = 0;
 	len += sizeof(*cpl);
 
 	/* Coalescing skipped and we send through normal path */
 	if (!(m->ol_flags & PKT_TX_TCP_SEG)) {
-		wr->op_immdlen = htonl(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
+		wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ?
+						  FW_ETH_TX_PKT_WR :
+						  FW_ETH_TX_PKT_VM_WR) |
 				       V_FW_WR_IMMDLEN(len));
-		cpl = (void *)(wr + 1);
+		if (is_pf4(adap))
+			cpl = (void *)(wr + 1);
+		else
+			cpl = (void *)(vmwr + 1);
 		if (m->ol_flags & PKT_TX_IP_CKSUM) {
 			cntrl = hwcsum(adap->params.chip, m) |
 				F_TXPKT_IPCSUM_DIS;
 			txq->stats.tx_cso++;
 		}
 	} else {
-		lso = (void *)(wr + 1);
+		if (is_pf4(adap))
+			lso = (void *)(wr + 1);
+		else
+			lso = (void *)(vmwr + 1);
 		v6 = (m->ol_flags & PKT_TX_IPV6) != 0;
 		l3hdr_len = m->l3_len;
 		l4hdr_len = m->l4_len;
 		eth_xtra_len = m->l2_len - ETHER_HDR_LEN;
 		len += sizeof(*lso);
-		wr->op_immdlen = htonl(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
+		wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ?
+						  FW_ETH_TX_PKT_WR :
+						  FW_ETH_TX_PKT_VM_WR) |
 				       V_FW_WR_IMMDLEN(len));
 		lso->lso_ctrl = htonl(V_LSO_OPCODE(CPL_TX_PKT_LSO) |
 				      F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
@@ -1221,9 +1289,14 @@ out_free:
 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->vlan_tci);
 	}
 
-	cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
-			   V_TXPKT_INTF(pi->tx_chan) |
-			   V_TXPKT_PF(adap->pf));
+	cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT));
+	if (is_pf4(adap))
+		cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->tx_chan) |
+				    V_TXPKT_PF(adap->pf));
+	else
+		cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->port_id) |
+				    V_TXPKT_PF(0));
+
 	cpl->pack = htons(0);
 	cpl->len = htons(m->pkt_len);
 	cpl->ctrl1 = cpu_to_be64(cntrl);
@@ -1468,6 +1541,7 @@ static int process_responses(struct sge_rspq *q, int budget,
 		rsp_type = G_RSPD_TYPE(rc->u.type_gen);
 
 		if (likely(rsp_type == X_RSPD_TYPE_FLBUF)) {
+			struct sge *s = &q->adapter->sge;
 			unsigned int stat_pidx;
 			int stat_pidx_diff;
 
@@ -1554,6 +1628,7 @@ static int process_responses(struct sge_rspq *q, int budget,
 					pkt->vlan_tci = ntohs(cpl->vlan);
 				}
 
+				rte_pktmbuf_adj(pkt, s->pktshift);
 				rxq->stats.pkts++;
 				rxq->stats.rx_bytes += pkt->pkt_len;
 				rx_pkts[budget - budget_left] = pkt;
@@ -1612,7 +1687,11 @@ int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts,
 		val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);
 
 		if (unlikely(!q->bar2_addr)) {
-			t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
+			u32 reg = is_pf4(q->adapter) ? MYPF_REG(A_SGE_PF_GTS) :
+						       T4VF_SGE_BASE_ADDR +
+						       A_SGE_VF_GTS;
+
+			t4_write_reg(q->adapter, reg,
 				     val | V_INGRESSQID((u32)q->cntxt_id));
 		} else {
 			writel(val | V_INGRESSQID(q->bar2_qid),
-- 
2.39.5