rte_mempool_in_use_count(rxq->mb_pool));
return -ENOMEM;
}
- rxq->sw_rx_ring[idx].mbuf = new_mb;
- rxq->sw_rx_ring[idx].page_offset = 0;
+ rxq->sw_rx_ring[idx] = new_mb;
mapping = rte_mbuf_data_iova_default(new_mb);
/* Advance PROD and get BD pointer */
rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count)
{
- void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned;
struct rte_mbuf *mbuf = NULL;
struct eth_rx_bd *rx_bd;
dma_addr_t mapping;
int i, ret = 0;
uint16_t idx;
+ uint16_t mask = NUM_RX_BDS(rxq);
if (count > QEDE_MAX_BULK_ALLOC_COUNT)
count = QEDE_MAX_BULK_ALLOC_COUNT;
- ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count);
+ idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
+
+ if (count > mask - idx + 1)
+ count = mask - idx + 1;
+
+ ret = rte_mempool_get_bulk(rxq->mb_pool, (void **)&rxq->sw_rx_ring[idx],
+ count);
+
if (unlikely(ret)) {
PMD_RX_LOG(ERR, rxq,
"Failed to allocate %d rx buffers "
}
for (i = 0; i < count; i++) {
- mbuf = obj_p[i];
- if (likely(i < count - 1))
- rte_prefetch0(obj_p[i + 1]);
+ rte_prefetch0(rxq->sw_rx_ring[(idx + 1) & NUM_RX_BDS(rxq)]);
+ mbuf = rxq->sw_rx_ring[idx & NUM_RX_BDS(rxq)];
- idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
- rxq->sw_rx_ring[idx].mbuf = mbuf;
- rxq->sw_rx_ring[idx].page_offset = 0;
mapping = rte_mbuf_data_iova_default(mbuf);
rx_bd = (struct eth_rx_bd *)
ecore_chain_produce(&rxq->rx_bd_ring);
rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
- rxq->sw_rx_prod++;
+ idx++;
}
+ rxq->sw_rx_prod = idx;
return 0;
}
if (rxq->sw_rx_ring) {
for (i = 0; i < rxq->nb_rx_desc; i++) {
- if (rxq->sw_rx_ring[i].mbuf) {
- rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
- rxq->sw_rx_ring[i].mbuf = NULL;
+ if (rxq->sw_rx_ring[i]) {
+ rte_pktmbuf_free(rxq->sw_rx_ring[i]);
+ rxq->sw_rx_ring[i] = NULL;
}
}
}
struct ecore_dev *edev = &qdev->edev;
struct qede_tx_queue *txq;
int rc;
+ size_t sw_tx_ring_size;
txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
RTE_CACHE_LINE_SIZE, socket_id);
}
/* Allocate software ring */
+ sw_tx_ring_size = sizeof(txq->sw_tx_ring) * txq->nb_tx_desc;
txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
- (sizeof(struct qede_tx_entry) *
- txq->nb_tx_desc),
+ sw_tx_ring_size,
RTE_CACHE_LINE_SIZE, socket_id);
if (!txq->sw_tx_ring) {
if (txq->sw_tx_ring) {
for (i = 0; i < txq->nb_tx_desc; i++) {
- if (txq->sw_tx_ring[i].mbuf) {
- rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
- txq->sw_tx_ring[i].mbuf = NULL;
+ if (txq->sw_tx_ring[i]) {
+ rte_pktmbuf_free(txq->sw_tx_ring[i]);
+ txq->sw_tx_ring[i] = NULL;
}
}
}
int qede_alloc_fp_resc(struct qede_dev *qdev)
{
- struct ecore_dev *edev = &qdev->edev;
+ struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
struct qede_fastpath *fp;
uint32_t num_sbs;
uint16_t sb_idx;
int i;
+ PMD_INIT_FUNC_TRACE(edev);
+
if (IS_VF(edev))
ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
else
for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
fp = &qdev->fp_array[sb_idx];
- if (!fp)
- continue;
fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info),
RTE_CACHE_LINE_SIZE);
if (!fp->sb_info) {
for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
fp = &qdev->fp_array[sb_idx];
- if (!fp)
- continue;
- DP_INFO(edev, "Free sb_info index 0x%x\n",
- fp->sb_info->igu_sb_id);
if (fp->sb_info) {
+ DP_INFO(edev, "Free sb_info index 0x%x\n",
+ fp->sb_info->igu_sb_id);
OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt,
fp->sb_info->sb_phys,
sizeof(struct status_block));
{
uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
- struct eth_rx_prod_data rx_prods = { 0 };
+ struct eth_rx_prod_data rx_prods;
/* Update producers */
+ memset(&rx_prods, 0, sizeof(rx_prods));
rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
}
static inline void
-qede_free_tx_pkt(struct qede_tx_queue *txq)
+qede_process_tx_compl(__rte_unused struct ecore_dev *edev,
+ struct qede_tx_queue *txq)
{
+ uint16_t hw_bd_cons;
+ uint16_t sw_tx_cons;
+ uint16_t remaining;
+ uint16_t mask;
struct rte_mbuf *mbuf;
uint16_t nb_segs;
uint16_t idx;
+ uint16_t first_idx;
+
+ rte_compiler_barrier();
+ rte_prefetch0(txq->hw_cons_ptr);
+ sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
+ hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
+#ifdef RTE_LIBRTE_QEDE_DEBUG_TX
+ PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
+ abs(hw_bd_cons - sw_tx_cons));
+#endif
- idx = TX_CONS(txq);
- mbuf = txq->sw_tx_ring[idx].mbuf;
- if (mbuf) {
+ mask = NUM_TX_BDS(txq);
+ idx = txq->sw_tx_cons & mask;
+
+ remaining = hw_bd_cons - sw_tx_cons;
+ txq->nb_tx_avail += remaining;
+ first_idx = idx;
+
+ while (remaining) {
+ mbuf = txq->sw_tx_ring[idx];
+ RTE_ASSERT(mbuf);
nb_segs = mbuf->nb_segs;
+ remaining -= nb_segs;
+
+ /* Prefetch the next mbuf. Note that at least the last 4 mbufs
+ * that are prefetched will not be used in the current call.
+ */
+ rte_mbuf_prefetch_part1(txq->sw_tx_ring[(idx + 4) & mask]);
+ rte_mbuf_prefetch_part2(txq->sw_tx_ring[(idx + 4) & mask]);
+
PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs);
+
while (nb_segs) {
- /* It's like consuming rxbuf in recv() */
ecore_chain_consume(&txq->tx_pbl);
- txq->nb_tx_avail++;
nb_segs--;
}
- rte_pktmbuf_free(mbuf);
- txq->sw_tx_ring[idx].mbuf = NULL;
- txq->sw_tx_cons++;
+
+ idx = (idx + 1) & mask;
PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
- } else {
- ecore_chain_consume(&txq->tx_pbl);
- txq->nb_tx_avail++;
}
-}
+ txq->sw_tx_cons = idx;
-static inline void
-qede_process_tx_compl(__rte_unused struct ecore_dev *edev,
- struct qede_tx_queue *txq)
-{
- uint16_t hw_bd_cons;
-#ifdef RTE_LIBRTE_QEDE_DEBUG_TX
- uint16_t sw_tx_cons;
-#endif
-
- rte_compiler_barrier();
- hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
-#ifdef RTE_LIBRTE_QEDE_DEBUG_TX
- sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
- PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
- abs(hw_bd_cons - sw_tx_cons));
-#endif
- while (hw_bd_cons != ecore_chain_get_cons_idx(&txq->tx_pbl))
- qede_free_tx_pkt(txq);
+ if (first_idx > idx) {
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[first_idx],
+ mask - first_idx + 1);
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[0], idx);
+ } else {
+ rte_pktmbuf_free_bulk(&txq->sw_tx_ring[first_idx],
+ idx - first_idx);
+ }
}
static int qede_drain_txq(struct qede_dev *qdev,
static inline void
qede_reuse_page(__rte_unused struct qede_dev *qdev,
- struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
+ struct qede_rx_queue *rxq, struct rte_mbuf *curr_cons)
{
struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
- struct qede_rx_entry *curr_prod;
dma_addr_t new_mapping;
- curr_prod = &rxq->sw_rx_ring[idx];
- *curr_prod = *curr_cons;
+ rxq->sw_rx_ring[idx] = curr_cons;
- new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) +
- curr_prod->page_offset;
+ new_mapping = rte_mbuf_data_iova_default(curr_cons);
rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
struct qede_dev *qdev, uint8_t count)
{
- struct qede_rx_entry *curr_cons;
+ struct rte_mbuf *curr_cons;
for (; count > 0; count--) {
- curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
+ curr_cons = rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
qede_reuse_page(qdev, rxq, curr_cons);
qede_rx_bd_ring_consume(rxq);
}
if (rte_le_to_cpu_16(len)) {
tpa_info = &rxq->tpa_info[agg_index];
cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
- curr_frag = rxq->sw_rx_ring[cons_idx].mbuf;
+ curr_frag = rxq->sw_rx_ring[cons_idx];
assert(curr_frag);
curr_frag->nb_segs = 1;
curr_frag->pkt_len = rte_le_to_cpu_16(len);
return -EINVAL;
}
sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
- seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
+ seg2 = rxq->sw_rx_ring[sw_rx_index];
qede_rx_bd_ring_consume(rxq);
pkt_len -= cur_size;
seg2->data_len = cur_size;
/* Get the data from the SW ring */
sw_rx_index = rxq->sw_rx_cons & num_rx_bds;
- rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
+ rx_mb = rxq->sw_rx_ring[sw_rx_index];
assert(rx_mb != NULL);
parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
"Outer L3 csum failed, flags = 0x%x\n",
parse_flag);
rxq->rx_hw_errors++;
- ol_flags |= PKT_RX_EIP_CKSUM_BAD;
+ ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
} else {
ol_flags |= PKT_RX_IP_CKSUM_GOOD;
}
/* Prefetch next mbuf while processing current one. */
preload_idx = rxq->sw_rx_cons & num_rx_bds;
- rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
+ rte_prefetch0(rxq->sw_rx_ring[preload_idx]);
/* Update rest of the MBUF fields */
rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
/* Get the data from the SW ring */
sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
- rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
+ rx_mb = rxq->sw_rx_ring[sw_rx_index];
assert(rx_mb != NULL);
/* Handle regular CQE or TPA start CQE */
"Outer L3 csum failed, flags = 0x%x\n",
parse_flag);
rxq->rx_hw_errors++;
- ol_flags |= PKT_RX_EIP_CKSUM_BAD;
+ ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
} else {
ol_flags |= PKT_RX_IP_CKSUM_GOOD;
}
/* Prefetch next mbuf while processing current one. */
preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
- rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
+ rte_prefetch0(rxq->sw_rx_ring[preload_idx]);
/* Update rest of the MBUF fields */
rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
}
#endif
+uint16_t
+qede_xmit_pkts_regular(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct qede_tx_queue *txq = p_txq;
+ struct qede_dev *qdev = txq->qdev;
+ struct ecore_dev *edev = &qdev->edev;
+ struct eth_tx_1st_bd *bd1;
+ struct eth_tx_2nd_bd *bd2;
+ struct eth_tx_3rd_bd *bd3;
+ struct rte_mbuf *m_seg = NULL;
+ struct rte_mbuf *mbuf;
+ struct rte_mbuf **sw_tx_ring;
+ uint16_t nb_tx_pkts;
+ uint16_t bd_prod;
+ uint16_t idx;
+ uint16_t nb_frags = 0;
+ uint16_t nb_pkt_sent = 0;
+ uint8_t nbds;
+ uint64_t tx_ol_flags;
+ /* BD1 */
+ uint16_t bd1_bf;
+ uint8_t bd1_bd_flags_bf;
+
+ if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
+ PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
+ nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
+ qede_process_tx_compl(edev, txq);
+ }
+
+ nb_tx_pkts = nb_pkts;
+ bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
+ sw_tx_ring = txq->sw_tx_ring;
+
+ while (nb_tx_pkts--) {
+ /* Init flags/values */
+ nbds = 0;
+ bd1 = NULL;
+ bd2 = NULL;
+ bd3 = NULL;
+ bd1_bf = 0;
+ bd1_bd_flags_bf = 0;
+ nb_frags = 0;
+
+ mbuf = *tx_pkts++;
+ assert(mbuf);
+
+
+ /* Check minimum TX BDS availability against available BDs */
+ if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
+ break;
+
+ tx_ol_flags = mbuf->ol_flags;
+ bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
+
+ if (unlikely(txq->nb_tx_avail <
+ ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
+ break;
+ bd1_bf |=
+ (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
+ << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
+
+ /* Offload the IP checksum in the hardware */
+ if (tx_ol_flags & PKT_TX_IP_CKSUM)
+ bd1_bd_flags_bf |=
+ 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
+
+ /* L4 checksum offload (tcp or udp) */
+ if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
+ (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM)))
+ bd1_bd_flags_bf |=
+ 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
+
+ /* Fill the entry in the SW ring and the BDs in the FW ring */
+ idx = TX_PROD(txq);
+ sw_tx_ring[idx] = mbuf;
+
+ /* BD1 */
+ bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
+ memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
+ nbds++;
+
+ /* Map MBUF linear data for DMA and set in the BD1 */
+ QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
+ mbuf->data_len);
+ bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
+ bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
+
+ /* Handle fragmented MBUF */
+ if (unlikely(mbuf->nb_segs > 1)) {
+ m_seg = mbuf->next;
+
+ /* Encode scatter gather buffer descriptors */
+ nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3,
+ nbds - 1);
+ }
+
+ bd1->data.nbds = nbds + nb_frags;
+
+ txq->nb_tx_avail -= bd1->data.nbds;
+ txq->sw_tx_prod++;
+ bd_prod =
+ rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
+#ifdef RTE_LIBRTE_QEDE_DEBUG_TX
+ print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
+#endif
+ nb_pkt_sent++;
+ txq->xmit_pkts++;
+ }
+
+ /* Write value of prod idx into bd_prod */
+ txq->tx_db.data.bd_prod = bd_prod;
+ rte_wmb();
+ rte_compiler_barrier();
+ DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
+ rte_wmb();
+
+ /* Check again for Tx completions */
+ qede_process_tx_compl(edev, txq);
+
+ PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
+ nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
+
+ return nb_pkt_sent;
+}
+
uint16_t
qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
/* Fill the entry in the SW ring and the BDs in the FW ring */
idx = TX_PROD(txq);
- txq->sw_tx_ring[idx].mbuf = mbuf;
+ txq->sw_tx_ring[idx] = mbuf;
/* BD1 */
bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);