From 880ead4e14a4dcecb04a361bcff3a9ce82c2c5ae Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Sun, 11 Mar 2018 04:18:25 +0530 Subject: [PATCH] net/cxgbe: update Tx and Rx path for VF On TX path, add fw_eth_tx_pkt_vm_wr to transmit packets over VF. Use is_pf4() to correctly calculate the work request size and offsets within the work request. On RX path, use pktshift to adjust data offset within the mbuf. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy --- drivers/net/cxgbe/base/adapter.h | 5 + drivers/net/cxgbe/base/t4fw_interface.h | 25 +++++ drivers/net/cxgbe/cxgbe_ethdev.c | 8 +- drivers/net/cxgbe/cxgbe_pfvf.h | 4 + drivers/net/cxgbe/cxgbevf_ethdev.c | 4 +- drivers/net/cxgbe/sge.c | 139 +++++++++++++++++++----- 6 files changed, 149 insertions(+), 36 deletions(-) diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h index 95752d1b4a..fcea4055a9 100644 --- a/drivers/net/cxgbe/base/adapter.h +++ b/drivers/net/cxgbe/base/adapter.h @@ -198,6 +198,7 @@ struct sge_eth_rxq { /* a SW Ethernet Rx queue */ * scenario where a packet needs 32 bytes. */ #define ETH_COALESCE_PKT_NUM 15 +#define ETH_COALESCE_VF_PKT_NUM 7 #define ETH_COALESCE_PKT_PER_DESC 2 struct tx_eth_coal_desc { @@ -227,6 +228,10 @@ struct eth_coalesce { unsigned int len; unsigned int flits; unsigned int max; + __u8 ethmacdst[ETHER_ADDR_LEN]; + __u8 ethmacsrc[ETHER_ADDR_LEN]; + __be16 ethtype; + __be16 vlantci; }; struct sge_txq { diff --git a/drivers/net/cxgbe/base/t4fw_interface.h b/drivers/net/cxgbe/base/t4fw_interface.h index 3a89814ff3..274f00b957 100644 --- a/drivers/net/cxgbe/base/t4fw_interface.h +++ b/drivers/net/cxgbe/base/t4fw_interface.h @@ -84,6 +84,8 @@ enum fw_memtype { enum fw_wr_opcodes { FW_ETH_TX_PKT_WR = 0x08, FW_ETH_TX_PKTS_WR = 0x09, + FW_ETH_TX_PKT_VM_WR = 0x11, + FW_ETH_TX_PKTS_VM_WR = 0x12, FW_ETH_TX_PKTS2_WR = 0x78, }; @@ -146,6 +148,29 @@ struct fw_eth_tx_pkts_wr { __u8 type; }; +struct fw_eth_tx_pkt_vm_wr { + __be32 op_immdlen; + __be32 equiq_to_len16; + __be32 r3[2]; + __u8 ethmacdst[6]; + __u8 ethmacsrc[6]; + __be16 ethtype; + __be16 vlantci; +}; + +struct fw_eth_tx_pkts_vm_wr { + __be32 op_pkd; + __be32 equiq_to_len16; + __be32 r3; + __be16 plen; + __u8 npkt; + __u8 r4; + __u8 ethmacdst[6]; + __u8 ethmacsrc[6]; + __be16 ethtype; + __be16 vlantci; +}; + /****************************************************************************** * C O M M A N D s *********************/ diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c index ef0a3f2654..f039364876 100644 --- a/drivers/net/cxgbe/cxgbe_ethdev.c +++ b/drivers/net/cxgbe/cxgbe_ethdev.c @@ -86,8 +86,8 @@ */ #include "t4_pci_id_tbl.h" -static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) +uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) { struct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue; uint16_t pkts_sent, pkts_remain; @@ -120,8 +120,8 @@ static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return total_sent; } -static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) +uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) { struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)rx_queue; unsigned int work_done; diff --git a/drivers/net/cxgbe/cxgbe_pfvf.h b/drivers/net/cxgbe/cxgbe_pfvf.h index e3d8533cae..19bfd6d92a 100644 --- a/drivers/net/cxgbe/cxgbe_pfvf.h +++ b/drivers/net/cxgbe/cxgbe_pfvf.h @@ -34,5 +34,9 @@ int cxgbe_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu); int cxgbe_dev_start(struct rte_eth_dev *eth_dev); int cxgbe_dev_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete); +uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); const uint32_t *cxgbe_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev); #endif /* _CXGBE_PFVF_H_ */ diff --git a/drivers/net/cxgbe/cxgbevf_ethdev.c b/drivers/net/cxgbe/cxgbevf_ethdev.c index 3b1deac523..a966303410 100644 --- a/drivers/net/cxgbe/cxgbevf_ethdev.c +++ b/drivers/net/cxgbe/cxgbevf_ethdev.c @@ -68,8 +68,8 @@ static int eth_cxgbevf_dev_init(struct rte_eth_dev *eth_dev) CXGBE_FUNC_TRACE(); eth_dev->dev_ops = &cxgbevf_eth_dev_ops; - eth_dev->rx_pkt_burst = NULL; - eth_dev->tx_pkt_burst = NULL; + eth_dev->rx_pkt_burst = &cxgbe_recv_pkts; + eth_dev->tx_pkt_burst = &cxgbe_xmit_pkts; pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); /* for secondary processes, we attach to ethdevs allocated by primary diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c index aba1a49f3d..54e13fb9ad 100644 --- a/drivers/net/cxgbe/sge.c +++ b/drivers/net/cxgbe/sge.c @@ -337,7 +337,11 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) * mechanism. */ if (unlikely(!q->bar2_addr)) { - t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + u32 reg = is_pf4(adap) ? MYPF_REG(A_SGE_PF_KDOORBELL) : + T4VF_SGE_BASE_ADDR + + A_SGE_VF_KDOORBELL; + + t4_write_reg_relaxed(adap, reg, val | V_QID(q->cntxt_id)); } else { writel_relaxed(val | V_QID(q->bar2_qid), @@ -570,12 +574,16 @@ static inline int is_eth_imm(const struct rte_mbuf *m) /** * calc_tx_flits - calculate the number of flits for a packet Tx WR * @m: the packet + * @adap: adapter structure pointer * * Returns the number of flits needed for a Tx WR for the given Ethernet * packet, including the needed WR and CPL headers. */ -static inline unsigned int calc_tx_flits(const struct rte_mbuf *m) +static inline unsigned int calc_tx_flits(const struct rte_mbuf *m, + struct adapter *adap) { + size_t wr_size = is_pf4(adap) ? sizeof(struct fw_eth_tx_pkt_wr) : + sizeof(struct fw_eth_tx_pkt_vm_wr); unsigned int flits; int hdrlen; @@ -600,11 +608,10 @@ static inline unsigned int calc_tx_flits(const struct rte_mbuf *m) */ flits = sgl_len(m->nb_segs); if (m->tso_segsz) - flits += (sizeof(struct fw_eth_tx_pkt_wr) + - sizeof(struct cpl_tx_pkt_lso_core) + + flits += (wr_size + sizeof(struct cpl_tx_pkt_lso_core) + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); else - flits += (sizeof(struct fw_eth_tx_pkt_wr) + + flits += (wr_size + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); return flits; } @@ -848,14 +855,20 @@ static void tx_timer_cb(void *data) static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, struct sge_eth_txq *txq) { - u32 wr_mid; - struct sge_txq *q = &txq->q; + struct fw_eth_tx_pkts_vm_wr *vmwr; + const size_t fw_hdr_copy_len = (sizeof(vmwr->ethmacdst) + + sizeof(vmwr->ethmacsrc) + + sizeof(vmwr->ethtype) + + sizeof(vmwr->vlantci)); struct fw_eth_tx_pkts_wr *wr; + struct sge_txq *q = &txq->q; unsigned int ndesc; + u32 wr_mid; /* fill the pkts WR header */ wr = (void *)&q->desc[q->pidx]; wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); + vmwr = (void *)&q->desc[q->pidx]; wr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(q->coalesce.flits, 2)); ndesc = flits_to_desc(q->coalesce.flits); @@ -863,12 +876,18 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, wr->plen = cpu_to_be16(q->coalesce.len); wr->npkt = q->coalesce.idx; wr->r3 = 0; - wr->type = q->coalesce.type; + if (is_pf4(adap)) { + wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); + wr->type = q->coalesce.type; + } else { + wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); + vmwr->r4 = 0; + memcpy((void *)vmwr->ethmacdst, (void *)q->coalesce.ethmacdst, + fw_hdr_copy_len); + } /* zero out coalesce structure members */ - q->coalesce.idx = 0; - q->coalesce.flits = 0; - q->coalesce.len = 0; + memset((void *)&q->coalesce, 0, sizeof(struct eth_coalesce)); txq_advance(q, ndesc); txq->stats.coal_wr++; @@ -896,13 +915,27 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, unsigned int *nflits, struct adapter *adap) { + struct fw_eth_tx_pkts_vm_wr *wr; + const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) + + sizeof(wr->ethmacsrc) + + sizeof(wr->ethtype) + + sizeof(wr->vlantci)); struct sge_txq *q = &txq->q; unsigned int flits, ndesc; unsigned char type = 0; - int credits; + int credits, wr_size; /* use coal WR type 1 when no frags are present */ type = (mbuf->nb_segs == 1) ? 1 : 0; + if (!is_pf4(adap)) { + if (!type) + return 0; + + if (q->coalesce.idx && memcmp((void *)q->coalesce.ethmacdst, + rte_pktmbuf_mtod(mbuf, void *), + fw_hdr_copy_len)) + ship_tx_pkt_coalesce_wr(adap, txq); + } if (unlikely(type != q->coalesce.type && q->coalesce.idx)) ship_tx_pkt_coalesce_wr(adap, txq); @@ -948,16 +981,21 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, new: /* start a new pkts WR, the WR header is not filled below */ - flits += sizeof(struct fw_eth_tx_pkts_wr) / sizeof(__be64); + wr_size = is_pf4(adap) ? sizeof(struct fw_eth_tx_pkts_wr) : + sizeof(struct fw_eth_tx_pkts_vm_wr); + flits += wr_size / sizeof(__be64); ndesc = flits_to_desc(q->coalesce.flits + flits); credits = txq_avail(q) - ndesc; if (unlikely(credits < 0 || wraps_around(q, ndesc))) return 0; - q->coalesce.flits += 2; + q->coalesce.flits += wr_size / sizeof(__be64); q->coalesce.type = type; q->coalesce.ptr = (unsigned char *)&q->desc[q->pidx] + - 2 * sizeof(__be64); + q->coalesce.flits * sizeof(__be64); + if (!is_pf4(adap)) + memcpy((void *)q->coalesce.ethmacdst, + rte_pktmbuf_mtod(mbuf, void *), fw_hdr_copy_len); return 1; } @@ -987,6 +1025,8 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq, struct cpl_tx_pkt_core *cpl; struct tx_sw_desc *sd; unsigned int idx = q->coalesce.idx, len = mbuf->pkt_len; + unsigned int max_coal_pkt_num = is_pf4(adap) ? ETH_COALESCE_PKT_NUM : + ETH_COALESCE_VF_PKT_NUM; #ifdef RTE_LIBRTE_CXGBE_TPUT RTE_SET_USED(nb_pkts); @@ -1030,9 +1070,12 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq, cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(mbuf->vlan_tci); } - cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | - V_TXPKT_INTF(pi->tx_chan) | - V_TXPKT_PF(adap->pf)); + cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT)); + if (is_pf4(adap)) + cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->tx_chan) | + V_TXPKT_PF(adap->pf)); + else + cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->port_id)); cpl->pack = htons(0); cpl->len = htons(len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1061,7 +1104,7 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq, sd->coalesce.idx = (idx & 1) + 1; /* send the coaelsced work request if max reached */ - if (++q->coalesce.idx == ETH_COALESCE_PKT_NUM + if (++q->coalesce.idx == max_coal_pkt_num #ifndef RTE_LIBRTE_CXGBE_TPUT || q->coalesce.idx >= nb_pkts #endif @@ -1085,6 +1128,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, struct adapter *adap; struct rte_mbuf *m = mbuf; struct fw_eth_tx_pkt_wr *wr; + struct fw_eth_tx_pkt_vm_wr *vmwr; struct cpl_tx_pkt_core *cpl; struct tx_sw_desc *d; dma_addr_t addr[m->nb_segs]; @@ -1141,7 +1185,7 @@ out_free: if (txq->q.coalesce.idx) ship_tx_pkt_coalesce_wr(adap, txq); - flits = calc_tx_flits(m); + flits = calc_tx_flits(m, adap); ndesc = flits_to_desc(flits); credits = txq_avail(&txq->q) - ndesc; @@ -1163,31 +1207,55 @@ out_free: } wr = (void *)&txq->q.desc[txq->q.pidx]; + vmwr = (void *)&txq->q.desc[txq->q.pidx]; wr->equiq_to_len16 = htonl(wr_mid); - wr->r3 = rte_cpu_to_be_64(0); - end = (u64 *)wr + flits; + if (is_pf4(adap)) { + wr->r3 = rte_cpu_to_be_64(0); + end = (u64 *)wr + flits; + } else { + const size_t fw_hdr_copy_len = (sizeof(vmwr->ethmacdst) + + sizeof(vmwr->ethmacsrc) + + sizeof(vmwr->ethtype) + + sizeof(vmwr->vlantci)); + + vmwr->r3[0] = rte_cpu_to_be_32(0); + vmwr->r3[1] = rte_cpu_to_be_32(0); + memcpy((void *)vmwr->ethmacdst, rte_pktmbuf_mtod(m, void *), + fw_hdr_copy_len); + end = (u64 *)vmwr + flits; + } len = 0; len += sizeof(*cpl); /* Coalescing skipped and we send through normal path */ if (!(m->ol_flags & PKT_TX_TCP_SEG)) { - wr->op_immdlen = htonl(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | + wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ? + FW_ETH_TX_PKT_WR : + FW_ETH_TX_PKT_VM_WR) | V_FW_WR_IMMDLEN(len)); - cpl = (void *)(wr + 1); + if (is_pf4(adap)) + cpl = (void *)(wr + 1); + else + cpl = (void *)(vmwr + 1); if (m->ol_flags & PKT_TX_IP_CKSUM) { cntrl = hwcsum(adap->params.chip, m) | F_TXPKT_IPCSUM_DIS; txq->stats.tx_cso++; } } else { - lso = (void *)(wr + 1); + if (is_pf4(adap)) + lso = (void *)(wr + 1); + else + lso = (void *)(vmwr + 1); v6 = (m->ol_flags & PKT_TX_IPV6) != 0; l3hdr_len = m->l3_len; l4hdr_len = m->l4_len; eth_xtra_len = m->l2_len - ETHER_HDR_LEN; len += sizeof(*lso); - wr->op_immdlen = htonl(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | + wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ? + FW_ETH_TX_PKT_WR : + FW_ETH_TX_PKT_VM_WR) | V_FW_WR_IMMDLEN(len)); lso->lso_ctrl = htonl(V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | @@ -1221,9 +1289,14 @@ out_free: cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->vlan_tci); } - cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | - V_TXPKT_INTF(pi->tx_chan) | - V_TXPKT_PF(adap->pf)); + cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT)); + if (is_pf4(adap)) + cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->tx_chan) | + V_TXPKT_PF(adap->pf)); + else + cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->port_id) | + V_TXPKT_PF(0)); + cpl->pack = htons(0); cpl->len = htons(m->pkt_len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1468,6 +1541,7 @@ static int process_responses(struct sge_rspq *q, int budget, rsp_type = G_RSPD_TYPE(rc->u.type_gen); if (likely(rsp_type == X_RSPD_TYPE_FLBUF)) { + struct sge *s = &q->adapter->sge; unsigned int stat_pidx; int stat_pidx_diff; @@ -1554,6 +1628,7 @@ static int process_responses(struct sge_rspq *q, int budget, pkt->vlan_tci = ntohs(cpl->vlan); } + rte_pktmbuf_adj(pkt, s->pktshift); rxq->stats.pkts++; rxq->stats.rx_bytes += pkt->pkt_len; rx_pkts[budget - budget_left] = pkt; @@ -1612,7 +1687,11 @@ int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts, val = V_CIDXINC(cidx_inc) | V_SEINTARM(params); if (unlikely(!q->bar2_addr)) { - t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS), + u32 reg = is_pf4(q->adapter) ? MYPF_REG(A_SGE_PF_GTS) : + T4VF_SGE_BASE_ADDR + + A_SGE_VF_GTS; + + t4_write_reg(q->adapter, reg, val | V_INGRESSQID((u32)q->cntxt_id)); } else { writel(val | V_INGRESSQID(q->bar2_qid), -- 2.20.1