Having those modulo operations implies costly instructions execution,
what can be avoided with conditionals and unlikely clauses.
This change makes the software ring read and write indexes to be now
always within the ring size which has to be handled properly. The main
problem is when write pointer wraps and being less than the read pointer.
This happened before, but just with indexes type size (uint32_t) wrapping,
and in that case the processor does the right thing no requiring special
handling by software.
This work has also led to discovering redundant pointers in the driver,
which have been removed.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
{
nfp_net_rx_queue_release_mbufs(rxq);
nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
{
nfp_net_rx_queue_release_mbufs(rxq);
rxq->rd_p = 0;
rxq->nb_rx_hold = 0;
}
rxq->rd_p = 0;
rxq->nb_rx_hold = 0;
}
nfp_net_tx_queue_release_mbufs(txq);
txq->wr_p = 0;
txq->rd_p = 0;
nfp_net_tx_queue_release_mbufs(txq);
txq->wr_p = 0;
txq->rd_p = 0;
- txq->tail = 0;
- txq->qcp_rd_p = 0;
- idx = rxq->rd_p % rxq->rx_count;
- rxds = &rxq->rxds[idx];
rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
rxe[i].mbuf = mbuf;
PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64 "\n", i, dma_addr);
rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
rxe[i].mbuf = mbuf;
PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64 "\n", i, dma_addr);
}
/* Make sure all writes are flushed before telling the hardware */
}
/* Make sure all writes are flushed before telling the hardware */
}
txq->tx_count = nb_desc;
}
txq->tx_count = nb_desc;
txq->tx_free_thresh = tx_free_thresh;
txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
txq->tx_free_thresh = tx_free_thresh;
txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
struct nfp_net_hw *hw;
struct rte_mbuf *mb;
struct rte_mbuf *new_mb;
struct nfp_net_hw *hw;
struct rte_mbuf *mb;
struct rte_mbuf *new_mb;
uint16_t nb_hold;
uint64_t dma_addr;
int avail;
uint16_t nb_hold;
uint64_t dma_addr;
int avail;
nb_hold = 0;
while (avail < nb_pkts) {
nb_hold = 0;
while (avail < nb_pkts) {
- idx = rxq->rd_p % rxq->rx_count;
-
- rxb = &rxq->rxbufs[idx];
+ rxb = &rxq->rxbufs[rxq->rd_p];
if (unlikely(rxb == NULL)) {
RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
break;
if (unlikely(rxb == NULL)) {
RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
break;
- rxds = &rxq->rxds[idx];
+ rxds = &rxq->rxds[rxq->rd_p];
if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
break;
if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
break;
rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
rxq->rd_p++;
rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
rxq->rd_p++;
+ if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
+ rxq->rd_p = 0;
/* Work out how many packets have been sent */
qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
/* Work out how many packets have been sent */
qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
- if (qcp_rd_p == txq->qcp_rd_p) {
+ if (qcp_rd_p == txq->rd_p) {
PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
"packets (%u, %u)\n", txq->qidx,
PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
"packets (%u, %u)\n", txq->qidx,
- qcp_rd_p, txq->qcp_rd_p);
- if (qcp_rd_p > txq->qcp_rd_p)
- todo = qcp_rd_p - txq->qcp_rd_p;
+ if (qcp_rd_p > txq->rd_p)
+ todo = qcp_rd_p - txq->rd_p;
- todo = qcp_rd_p + txq->tx_count - txq->qcp_rd_p;
+ todo = qcp_rd_p + txq->tx_count - txq->rd_p;
- PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->qcp_rd_p: %u, qcp->rd_p: %u\n",
- qcp_rd_p, txq->qcp_rd_p, txq->rd_p);
+ PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u\n",
+ qcp_rd_p, txq->rd_p, txq->rd_p);
if (todo == 0)
return todo;
if (todo == 0)
return todo;
- txq->qcp_rd_p += todo;
- txq->qcp_rd_p %= txq->tx_count;
+ if (unlikely(txq->rd_p >= txq->tx_count))
+ txq->rd_p -= txq->tx_count;
return todo;
}
/* Leaving always free descriptors for avoiding wrapping confusion */
return todo;
}
/* Leaving always free descriptors for avoiding wrapping confusion */
-#define NFP_FREE_TX_DESC(t) (t->tx_count - (t->wr_p - t->rd_p) - 8)
+static inline
+uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq)
+{
+ if (txq->wr_p >= txq->rd_p)
+ return txq->tx_count - (txq->wr_p - txq->rd_p) - 8;
+ else
+ return txq->rd_p - txq->wr_p - 8;
+}
/*
* nfp_net_txq_full - Check if the TX queue free descriptors
/*
* nfp_net_txq_full - Check if the TX queue free descriptors
* This function uses the host copy* of read/write pointers
*/
static inline
* This function uses the host copy* of read/write pointers
*/
static inline
-int nfp_net_txq_full(struct nfp_net_txq *txq)
+uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
- return NFP_FREE_TX_DESC(txq) < txq->tx_free_thresh;
+ return (nfp_free_tx_desc(txq) < txq->tx_free_thresh);
txq = tx_queue;
hw = txq->hw;
txq = tx_queue;
hw = txq->hw;
- txds = &txq->txds[txq->tail];
+ txds = &txq->txds[txq->wr_p];
PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets\n",
PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets\n",
- txq->qidx, txq->tail, nb_pkts);
+ txq->qidx, txq->wr_p, nb_pkts);
- if ((NFP_FREE_TX_DESC(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
+ if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
nfp_net_tx_free_bufs(txq);
nfp_net_tx_free_bufs(txq);
- free_descs = (uint16_t)NFP_FREE_TX_DESC(txq);
+ free_descs = (uint16_t)nfp_free_tx_desc(txq);
if (unlikely(free_descs == 0))
return 0;
if (unlikely(free_descs == 0))
return 0;
/* Sending packets */
while ((i < nb_pkts) && free_descs) {
/* Grabbing the mbuf linked to the current descriptor */
/* Sending packets */
while ((i < nb_pkts) && free_descs) {
/* Grabbing the mbuf linked to the current descriptor */
- lmbuf = &txq->txbufs[txq->tail].mbuf;
+ lmbuf = &txq->txbufs[txq->wr_p].mbuf;
/* Warming the cache for releasing the mbuf later on */
RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
/* Warming the cache for releasing the mbuf later on */
RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
free_descs--;
txq->wr_p++;
free_descs--;
txq->wr_p++;
- txq->tail++;
- if (unlikely(txq->tail == txq->tx_count)) /* wrapping?*/
- txq->tail = 0;
+ if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/
+ txq->wr_p = 0;
pkt_size -= dma_size;
if (!pkt_size) {
pkt_size -= dma_size;
if (!pkt_size) {
pkt = pkt->next;
}
/* Referencing next free TX descriptor */
pkt = pkt->next;
}
/* Referencing next free TX descriptor */
- txds = &txq->txds[txq->tail];
+ txds = &txq->txds[txq->wr_p];
uint32_t wr_p;
uint32_t rd_p;
uint32_t wr_p;
uint32_t rd_p;
uint32_t tx_count;
uint32_t tx_free_thresh;
uint32_t tx_count;
uint32_t tx_free_thresh;
/*
* For each descriptor keep a reference to the mbuff and
/*
* For each descriptor keep a reference to the mbuff and
struct nfp_net_tx_desc *txds;
/*
struct nfp_net_tx_desc *txds;
/*
- * At this point 56 bytes have been used for all the fields in the
+ * At this point 48 bytes have been used for all the fields in the
* TX critical path. We have room for 8 bytes and still all placed
* in a cache line. We are not using the threshold values below nor
* the txq_flags but if we need to, we can add the most used in the
* TX critical path. We have room for 8 bytes and still all placed
* in a cache line. We are not using the threshold values below nor
* the txq_flags but if we need to, we can add the most used in the
* freelist descriptors and @rd_p is where the driver start
* reading descriptors for newly arrive packets from.
*/
* freelist descriptors and @rd_p is where the driver start
* reading descriptors for newly arrive packets from.
*/