From 54ecae2e190bfc870123ce29c83dfed9a90bf636 Mon Sep 17 00:00:00 2001 From: Harish Patil Date: Fri, 6 Jan 2017 00:16:45 -0800 Subject: [PATCH] net/qede: fix scatter-gather - Make qede_process_sg_pkts() inline and add unlikely check - Fix mbuf segment chaining logic in qede_process_sg_pkts() - Change qede_encode_sg_bd() to return total segments required - Fix first TX buffer descriptor's length - Replace repetitive code using a macro Fixes: bec0228816c0 ("net/qede: support scatter gather") Signed-off-by: Harish Patil --- drivers/net/qede/qede_rxtx.c | 139 ++++++++++++++++------------------- drivers/net/qede/qede_rxtx.h | 4 - 2 files changed, 65 insertions(+), 78 deletions(-) diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c index 814d384f74..ecff5bc81d 100644 --- a/drivers/net/qede/qede_rxtx.c +++ b/drivers/net/qede/qede_rxtx.c @@ -810,39 +810,28 @@ static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags) return RTE_PTYPE_UNKNOWN; } - -int qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb, - int num_segs, uint16_t pkt_len) +static inline int +qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb, + uint8_t num_segs, uint16_t pkt_len) { struct qede_rx_queue *rxq = p_rxq; struct qede_dev *qdev = rxq->qdev; struct ecore_dev *edev = &qdev->edev; - uint16_t sw_rx_index, cur_size; - register struct rte_mbuf *seg1 = NULL; register struct rte_mbuf *seg2 = NULL; + uint16_t sw_rx_index; + uint16_t cur_size; seg1 = rx_mb; while (num_segs) { - cur_size = pkt_len > rxq->rx_buf_size ? - rxq->rx_buf_size : pkt_len; - if (!cur_size) { - PMD_RX_LOG(DEBUG, rxq, - "SG packet, len and num BD mismatch\n"); + cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size : + pkt_len; + if (unlikely(!cur_size)) { + PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs" + " left for mapping jumbo\n", num_segs); qede_recycle_rx_bd_ring(rxq, qdev, num_segs); return -EINVAL; } - - if (qede_alloc_rx_buffer(rxq)) { - uint8_t index; - - PMD_RX_LOG(DEBUG, rxq, "Buffer allocation failed\n"); - index = rxq->port_id; - rte_eth_devices[index].data->rx_mbuf_alloc_failed++; - rxq->rx_alloc_errors++; - return -ENOMEM; - } - sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq); seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf; qede_rx_bd_ring_consume(rxq); @@ -852,16 +841,9 @@ int qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb, seg1 = seg1->next; num_segs--; rxq->rx_segs++; - continue; } - seg1 = NULL; - - if (pkt_len) - PMD_RX_LOG(DEBUG, rxq, - "Mapped all BDs of jumbo, but still have %d bytes\n", - pkt_len); - return ECORE_SUCCESS; + return 0; } uint16_t @@ -878,11 +860,16 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) register struct rte_mbuf *rx_mb = NULL; register struct rte_mbuf *seg1 = NULL; enum eth_rx_cqe_type cqe_type; - uint16_t len, pad, preload_idx, pkt_len, parse_flag; - uint8_t csum_flag, num_segs; + uint16_t pkt_len; /* Sum of all BD segments */ + uint16_t len; /* Length of first BD */ + uint8_t num_segs = 1; + uint16_t pad; + uint16_t preload_idx; + uint8_t csum_flag; + uint16_t parse_flag; enum rss_hash_type htype; uint8_t tunn_parse_flag; - int ret; + uint8_t j; hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); @@ -915,6 +902,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) fp_cqe = &cqe->fast_path_regular; len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd); + pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len); pad = fp_cqe->placement_offset; assert((len + pad) <= rx_mb->buf_len); @@ -979,25 +967,29 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxq->rx_alloc_errors++; break; } - qede_rx_bd_ring_consume(rxq); - if (fp_cqe->bd_num > 1) { - pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len); + PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs" + " len on first: %04x Total Len: %04x\n", + fp_cqe->bd_num, len, pkt_len); num_segs = fp_cqe->bd_num - 1; - - rxq->rx_segs++; - - pkt_len -= len; seg1 = rx_mb; - ret = qede_process_sg_pkts(p_rxq, seg1, num_segs, - pkt_len); - if (ret != ECORE_SUCCESS) { - qede_recycle_rx_bd_ring(rxq, qdev, - fp_cqe->bd_num); + if (qede_process_sg_pkts(p_rxq, seg1, num_segs, + pkt_len - len)) goto next_cqe; + for (j = 0; j < num_segs; j++) { + if (qede_alloc_rx_buffer(rxq)) { + PMD_RX_LOG(ERR, rxq, + "Buffer allocation failed\n"); + rte_eth_devices[rxq->port_id]. + data->rx_mbuf_alloc_failed++; + rxq->rx_alloc_errors++; + break; + } + rxq->rx_segs++; } } + rxq->rx_segs++; /* for the first segment */ /* Prefetch next mbuf while processing current one. */ preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); @@ -1007,7 +999,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rx_mb->data_off = pad + RTE_PKTMBUF_HEADROOM; rx_mb->nb_segs = fp_cqe->bd_num; rx_mb->data_len = len; - rx_mb->pkt_len = fp_cqe->pkt_len; + rx_mb->pkt_len = pkt_len; rx_mb->port = rxq->port_id; htype = (uint8_t)GET_FIELD(fp_cqe->bitfields, @@ -1114,17 +1106,16 @@ qede_process_tx_compl(struct ecore_dev *edev, struct qede_tx_queue *txq) } /* Populate scatter gather buffer descriptor fields */ -static inline uint16_t qede_encode_sg_bd(struct qede_tx_queue *p_txq, - struct rte_mbuf *m_seg, - uint16_t count, - struct eth_tx_1st_bd *bd1) +static inline uint8_t +qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg, + struct eth_tx_1st_bd *bd1) { struct qede_tx_queue *txq = p_txq; struct eth_tx_2nd_bd *bd2 = NULL; struct eth_tx_3rd_bd *bd3 = NULL; struct eth_tx_bd *tx_bd = NULL; - uint16_t nb_segs = count; dma_addr_t mapping; + uint8_t nb_segs = 1; /* min one segment per packet */ /* Check for scattered buffers */ while (m_seg) { @@ -1133,28 +1124,27 @@ static inline uint16_t qede_encode_sg_bd(struct qede_tx_queue *p_txq, ecore_chain_produce(&txq->tx_pbl); memset(bd2, 0, sizeof(*bd2)); mapping = rte_mbuf_data_dma_addr(m_seg); - bd2->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); - bd2->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); - bd2->nbytes = rte_cpu_to_le_16(m_seg->data_len); + QEDE_BD_SET_ADDR_LEN(bd2, mapping, m_seg->data_len); + PMD_TX_LOG(DEBUG, txq, "BD2 len %04x\n", + m_seg->data_len); } else if (nb_segs == 2) { bd3 = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(bd3, 0, sizeof(*bd3)); mapping = rte_mbuf_data_dma_addr(m_seg); - bd3->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); - bd3->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); - bd3->nbytes = rte_cpu_to_le_16(m_seg->data_len); + QEDE_BD_SET_ADDR_LEN(bd3, mapping, m_seg->data_len); + PMD_TX_LOG(DEBUG, txq, "BD3 len %04x\n", + m_seg->data_len); } else { tx_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_bd, 0, sizeof(*tx_bd)); mapping = rte_mbuf_data_dma_addr(m_seg); - tx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); - tx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); - tx_bd->nbytes = rte_cpu_to_le_16(m_seg->data_len); + QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len); + PMD_TX_LOG(DEBUG, txq, "BD len %04x\n", + m_seg->data_len); } nb_segs++; - bd1->data.nbds = nb_segs; m_seg = m_seg->next; } @@ -1170,13 +1160,14 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct ecore_dev *edev = &qdev->edev; struct qede_fastpath *fp; struct eth_tx_1st_bd *bd1; + struct rte_mbuf *mbuf; struct rte_mbuf *m_seg = NULL; uint16_t nb_tx_pkts; - uint16_t nb_pkt_sent = 0; uint16_t bd_prod; uint16_t idx; uint16_t tx_count; - uint16_t nb_segs = 0; + uint16_t nb_frags; + uint16_t nb_pkt_sent = 0; fp = &qdev->fp_array[QEDE_RSS_COUNT(qdev) + txq->queue_id]; @@ -1198,19 +1189,19 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) while (nb_tx_pkts--) { /* Fill the entry in the SW ring and the BDs in the FW ring */ idx = TX_PROD(txq); - struct rte_mbuf *mbuf = *tx_pkts++; - + mbuf = *tx_pkts++; txq->sw_tx_ring[idx].mbuf = mbuf; bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl); - /* Zero init struct fields */ - bd1->data.bd_flags.bitfields = 0; - bd1->data.bitfields = 0; - bd1->data.bd_flags.bitfields = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; + /* FW 8.10.x specific change */ + bd1->data.bitfields = + (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) + << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; /* Map MBUF linear data for DMA and set in the first BD */ QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf), - mbuf->pkt_len); + mbuf->data_len); + PMD_TX_LOG(INFO, txq, "BD1 len %04x\n", mbuf->data_len); if (RTE_ETH_IS_TUNNEL_PKT(mbuf->packet_type)) { PMD_TX_LOG(INFO, txq, "Tx tunnel packet\n"); @@ -1267,18 +1258,18 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) /* Handle fragmented MBUF */ m_seg = mbuf->next; - nb_segs++; - bd1->data.nbds = nb_segs; /* Encode scatter gather buffer descriptors if required */ - nb_segs = qede_encode_sg_bd(txq, m_seg, nb_segs, bd1); - txq->nb_tx_avail = txq->nb_tx_avail - nb_segs; - nb_segs = 0; + nb_frags = qede_encode_sg_bd(txq, m_seg, bd1); + bd1->data.nbds = nb_frags; + txq->nb_tx_avail -= nb_frags; txq->sw_tx_prod++; rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf); bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); nb_pkt_sent++; txq->xmit_pkts++; + PMD_TX_LOG(INFO, txq, "nbds = %d pkt_len = %04x\n", + bd1->data.nbds, mbuf->pkt_len); } /* Write value of prod idx into bd_prod */ diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h index 2a8645a086..a95b4abd17 100644 --- a/drivers/net/qede/qede_rxtx.h +++ b/drivers/net/qede/qede_rxtx.h @@ -41,10 +41,6 @@ (bd)->addr.hi = rte_cpu_to_le_32(U64_HI(maddr)); \ (bd)->addr.lo = rte_cpu_to_le_32(U64_LO(maddr)); \ (bd)->nbytes = rte_cpu_to_le_16(len); \ - /* FW 8.10.x specific change */ \ - (bd)->data.bitfields = ((len) & \ - ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) \ - << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; \ } while (0) #define CQE_HAS_VLAN(flags) \ -- 2.20.1