summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
c009c6b)
For Tx SW ring (txq->elts[]), indexes are kept and used in
txq->elts_head/tail. Because of this, one entry must always be left unused
and it also makes code complex. Changed to store counters instead of
indexes in order to make the code simpler and to reduce a few calculations.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
static inline void
txq_complete(struct txq *txq)
{
static inline void
txq_complete(struct txq *txq)
{
- const unsigned int elts_n = 1 << txq->elts_n;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
ctrl = (volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
ctrl = (volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
- assert(elts_tail < (1 << txq->wqe_n));
+ assert((elts_tail & elts_m) < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
/* Free buffers. */
while (elts_free != elts_tail) {
- struct rte_mbuf *elt = (*txq->elts)[elts_free];
- unsigned int elts_free_next =
- (elts_free + 1) & (elts_n - 1);
- struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next];
+ struct rte_mbuf *elt = (*txq->elts)[elts_free & elts_m];
+ struct rte_mbuf *elt_next =
+ (*txq->elts)[(elts_free + 1) & elts_m];
#ifndef NDEBUG
/* Poisoning. */
#ifndef NDEBUG
/* Poisoning. */
- memset(&(*txq->elts)[elts_free],
+ memset(&(*txq->elts)[elts_free & elts_m],
- sizeof((*txq->elts)[elts_free]));
+ sizeof((*txq->elts)[elts_free & elts_m]));
#endif
RTE_MBUF_PREFETCH_TO_FREE(elt_next);
/* Only one segment needs to be freed. */
rte_pktmbuf_free_seg(elt);
#endif
RTE_MBUF_PREFETCH_TO_FREE(elt_next);
/* Only one segment needs to be freed. */
rte_pktmbuf_free_seg(elt);
- elts_free = elts_free_next;
}
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
}
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
{
struct txq *txq = tx_queue;
mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
{
struct txq *txq = tx_queue;
- const unsigned int elts_n = 1 << txq->elts_n;
- const unsigned int elts_cnt = elts_n - 1;
- unsigned int used;
- used = (txq->elts_head - txq->elts_tail) & elts_cnt;
+ used = txq->elts_head - txq->elts_tail;
if (offset < used)
return RTE_ETH_TX_DESC_FULL;
return RTE_ETH_TX_DESC_DONE;
if (offset < used)
return RTE_ETH_TX_DESC_FULL;
return RTE_ETH_TX_DESC_DONE;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = 1 << txq->elts_n;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
unsigned int i = 0;
unsigned int j = 0;
unsigned int k = 0;
unsigned int i = 0;
unsigned int j = 0;
unsigned int k = 0;
unsigned int max_inline = txq->max_inline;
const unsigned int inline_en = !!max_inline && txq->inline_en;
uint16_t max_wqe;
unsigned int max_inline = txq->max_inline;
const unsigned int inline_en = !!max_inline && txq->inline_en;
uint16_t max_wqe;
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
- max = (elts_n - (elts_head - txq->elts_tail));
- if (max > elts_n)
- max -= elts_n;
+ max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
* that one ring entry remains unused.
*/
assert(segs_n);
* that one ring entry remains unused.
*/
assert(segs_n);
--segs_n;
if (unlikely(--max_wqe == 0))
break;
--segs_n;
if (unlikely(--max_wqe == 0))
break;
if (length < (MLX5_WQE_DWORD_SIZE + 2))
break;
/* Update element. */
if (length < (MLX5_WQE_DWORD_SIZE + 2))
break;
/* Update element. */
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head & elts_m] = buf;
/* Prefetch next buffer data. */
if (pkts_n - i > 1)
rte_prefetch0(
/* Prefetch next buffer data. */
if (pkts_n - i > 1)
rte_prefetch0(
- elts_head = (elts_head + 1) & (elts_n - 1);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[++elts_head & elts_m] = buf;
++sg;
/* Advance counter only if all segs are successfully posted. */
if (sg < segs_n)
++sg;
/* Advance counter only if all segs are successfully posted. */
if (sg < segs_n)
- elts_head = (elts_head + 1) & (elts_n - 1);
++pkts;
++i;
/* Initialize known and common part of the WQE structure. */
++pkts;
++i;
/* Initialize known and common part of the WQE structure. */
/* Take a shortcut if nothing must be sent. */
if (unlikely((i + k) == 0))
return 0;
/* Take a shortcut if nothing must be sent. */
if (unlikely((i + k) == 0))
return 0;
- txq->elts_head = (txq->elts_head + i + j) & (elts_n - 1);
+ txq->elts_head += (i + j);
/* Check whether completion threshold has been reached. */
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
/* Check whether completion threshold has been reached. */
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = 1 << txq->elts_n;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
unsigned int i = 0;
unsigned int j = 0;
unsigned int i = 0;
unsigned int j = 0;
uint16_t max_wqe;
unsigned int comp;
struct mlx5_mpw mpw = {
uint16_t max_wqe;
unsigned int comp;
struct mlx5_mpw mpw = {
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
- max = (elts_n - (elts_head - txq->elts_tail));
- if (max > elts_n)
- max -= elts_n;
+ max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
do {
struct rte_mbuf *buf = *(pkts++);
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
do {
struct rte_mbuf *buf = *(pkts++);
- unsigned int elts_head_next;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint32_t cs_flags = 0;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint32_t cs_flags = 0;
* that one ring entry remains unused.
*/
assert(segs_n);
* that one ring entry remains unused.
*/
assert(segs_n);
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
break;
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
break;
--pkts_n;
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
--pkts_n;
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
volatile struct mlx5_wqe_data_seg *dseg;
uintptr_t addr;
volatile struct mlx5_wqe_data_seg *dseg;
uintptr_t addr;
- elts_head_next = (elts_head + 1) & (elts_n - 1);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
- elts_head = elts_head_next;
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
assert(length == mpw.len);
if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
mlx5_mpw_close(txq, &mpw);
assert(length == mpw.len);
if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
mlx5_mpw_close(txq, &mpw);
- elts_head = elts_head_next;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = 1 << txq->elts_n;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
unsigned int i = 0;
unsigned int j = 0;
unsigned int i = 0;
unsigned int j = 0;
uint16_t max_wqe;
unsigned int comp;
unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
uint16_t max_wqe;
unsigned int comp;
unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
- max = (elts_n - (elts_head - txq->elts_tail));
- if (max > elts_n)
- max -= elts_n;
+ max_elts = (elts_n - (elts_head - txq->elts_tail));
do {
struct rte_mbuf *buf = *(pkts++);
do {
struct rte_mbuf *buf = *(pkts++);
- unsigned int elts_head_next;
uintptr_t addr;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uintptr_t addr;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
* that one ring entry remains unused.
*/
assert(segs_n);
* that one ring entry remains unused.
*/
assert(segs_n);
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
break;
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
break;
--pkts_n;
/*
* Compute max_wqe in case less WQE were consumed in previous
--pkts_n;
/*
* Compute max_wqe in case less WQE were consumed in previous
do {
volatile struct mlx5_wqe_data_seg *dseg;
do {
volatile struct mlx5_wqe_data_seg *dseg;
- elts_head_next =
- (elts_head + 1) & (elts_n - 1);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
- elts_head = elts_head_next;
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
assert(length <= inline_room);
assert(length == DATA_LEN(buf));
assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
assert(length <= inline_room);
assert(length == DATA_LEN(buf));
- elts_head_next = (elts_head + 1) & (elts_n - 1);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
/* Maximum number of bytes before wrapping. */
max = ((((uintptr_t)(txq->wqes)) +
(1 << txq->wqe_n) *
/* Maximum number of bytes before wrapping. */
max = ((((uintptr_t)(txq->wqes)) +
(1 << txq->wqe_n) *
inline_room -= length;
}
}
inline_room -= length;
}
}
- elts_head = elts_head_next;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = 1 << txq->elts_n;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
unsigned int i = 0;
unsigned int j = 0;
unsigned int i = 0;
unsigned int j = 0;
uint16_t max_wqe;
unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
unsigned int mpw_room = 0;
uint16_t max_wqe;
unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
unsigned int mpw_room = 0;
/* Start processing. */
txq_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
/* Start processing. */
txq_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- if (max_elts > elts_n)
- max_elts -= elts_n;
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
return 0;
do {
struct rte_mbuf *buf = *(pkts++);
return 0;
do {
struct rte_mbuf *buf = *(pkts++);
- unsigned int elts_head_next;
uintptr_t addr;
uint64_t naddr;
unsigned int n;
uintptr_t addr;
uint64_t naddr;
unsigned int n;
* that one ring entry remains unused.
*/
assert(segs_n);
* that one ring entry remains unused.
*/
assert(segs_n);
- if (max_elts - j < segs_n + 1)
+ if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
break;
/* Do not bother with large packets MPW cannot handle. */
if (segs_n > MLX5_MPW_DSEG_MAX)
do {
volatile struct mlx5_wqe_data_seg *dseg;
do {
volatile struct mlx5_wqe_data_seg *dseg;
- elts_head_next =
- (elts_head + 1) & (elts_n - 1);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
.addr = htonll(addr),
};
- elts_head = elts_head_next;
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
#endif
/* No need to get completion as the entire packet is
* copied to WQ. Free the buf right away.
*/
/* No need to get completion as the entire packet is
* copied to WQ. Free the buf right away.
*/
- elts_head_next = elts_head;
rte_pktmbuf_free_seg(buf);
mpw_room -= (inl_pad + sizeof(inl_hdr) + length);
/* Add pad in the next packet if any. */
rte_pktmbuf_free_seg(buf);
mpw_room -= (inl_pad + sizeof(inl_hdr) + length);
/* Add pad in the next packet if any. */
dseg = (volatile void *)
((uintptr_t)mpw.data.raw +
inl_pad);
dseg = (volatile void *)
((uintptr_t)mpw.data.raw +
inl_pad);
- elts_head_next = (elts_head + 1) & (elts_n - 1);
- (*txq->elts)[elts_head] = buf;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
addr = rte_pktmbuf_mtod(buf, uintptr_t);
for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
rte_prefetch2((void *)(addr +
addr = rte_pktmbuf_mtod(buf, uintptr_t);
for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
rte_prefetch2((void *)(addr +
mpw_room -= (inl_pad + sizeof(*dseg));
inl_pad = 0;
}
mpw_room -= (inl_pad + sizeof(*dseg));
inl_pad = 0;
}
- elts_head = elts_head_next;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
/* TX queue descriptor. */
__extension__
struct txq {
/* TX queue descriptor. */
__extension__
struct txq {
- uint16_t elts_head; /* Current index in (*elts)[]. */
- uint16_t elts_tail; /* First element awaiting completion. */
+ uint16_t elts_head; /* Current counter in (*elts)[]. */
+ uint16_t elts_tail; /* Counter of first element awaiting completion. */
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t mpw_comp; /* WQ index since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t mpw_comp; /* WQ index since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
static void
txq_free_elts(struct txq_ctrl *txq_ctrl)
{
static void
txq_free_elts(struct txq_ctrl *txq_ctrl)
{
- unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
- unsigned int elts_head = txq_ctrl->txq.elts_head;
- unsigned int elts_tail = txq_ctrl->txq.elts_tail;
+ const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
+ const uint16_t elts_m = elts_n - 1;
+ uint16_t elts_head = txq_ctrl->txq.elts_head;
+ uint16_t elts_tail = txq_ctrl->txq.elts_tail;
struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
DEBUG("%p: freeing WRs", (void *)txq_ctrl);
struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
DEBUG("%p: freeing WRs", (void *)txq_ctrl);
txq_ctrl->txq.elts_comp = 0;
while (elts_tail != elts_head) {
txq_ctrl->txq.elts_comp = 0;
while (elts_tail != elts_head) {
- struct rte_mbuf *elt = (*elts)[elts_tail];
+ struct rte_mbuf *elt = (*elts)[elts_tail & elts_m];
assert(elt != NULL);
rte_pktmbuf_free_seg(elt);
#ifndef NDEBUG
/* Poisoning. */
assert(elt != NULL);
rte_pktmbuf_free_seg(elt);
#ifndef NDEBUG
/* Poisoning. */
- memset(&(*elts)[elts_tail],
+ memset(&(*elts)[elts_tail & elts_m],
- sizeof((*elts)[elts_tail]));
+ sizeof((*elts)[elts_tail & elts_m]));
- if (++elts_tail == elts_n)
- elts_tail = 0;