/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
uint16_t tso_header_sz = 0;
uint16_t ehdr;
- uint8_t cs_flags = 0;
+ uint8_t cs_flags;
uint64_t tso = 0;
uint16_t tso_segsz = 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
if (max_elts < segs_n)
break;
max_elts -= segs_n;
- --segs_n;
+ sg = --segs_n;
if (unlikely(--max_wqe == 0))
break;
wqe = (volatile struct mlx5_wqe_v *)
if (pkts_n - i > 1)
rte_prefetch0(
rte_pktmbuf_mtod(*(pkts + 1), volatile void *));
- /* Should we enable HW CKSUM offload */
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- const uint64_t is_tunneled = buf->ol_flags &
- (PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN);
-
- if (is_tunneled && txq->tunnel_en) {
- cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
- MLX5_ETH_WQE_L4_INNER_CSUM;
- if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
- cs_flags |= MLX5_ETH_WQE_L3_CSUM;
- } else {
- cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
- }
- }
+ cs_flags = txq_ol_cksum_to_cs(txq, buf);
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
addr += pkt_inline_sz;
}
raw += MLX5_WQE_DWORD_SIZE;
- if (txq->tso_en) {
- tso = buf->ol_flags & PKT_TX_TCP_SEG;
- if (tso) {
- uintptr_t end = (uintptr_t)
- (((uintptr_t)txq->wqes) +
- (1 << txq->wqe_n) *
- MLX5_WQE_SIZE);
- unsigned int copy_b;
- uint8_t vlan_sz = (buf->ol_flags &
- PKT_TX_VLAN_PKT) ? 4 : 0;
- const uint64_t is_tunneled =
- buf->ol_flags &
- (PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN);
-
- tso_header_sz = buf->l2_len + vlan_sz +
- buf->l3_len + buf->l4_len;
- tso_segsz = buf->tso_segsz;
- if (unlikely(tso_segsz == 0)) {
- txq->stats.oerrors++;
- break;
- }
- if (is_tunneled && txq->tunnel_en) {
- tso_header_sz += buf->outer_l2_len +
- buf->outer_l3_len;
- cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
- } else {
- cs_flags |= MLX5_ETH_WQE_L4_CSUM;
- }
- if (unlikely(tso_header_sz >
- MLX5_MAX_TSO_HEADER)) {
- txq->stats.oerrors++;
+ tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
+ if (tso) {
+ uintptr_t end =
+ (uintptr_t)(((uintptr_t)txq->wqes) +
+ (1 << txq->wqe_n) * MLX5_WQE_SIZE);
+ unsigned int copy_b;
+ uint8_t vlan_sz =
+ (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
+ const uint64_t is_tunneled =
+ buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN);
+
+ tso_header_sz = buf->l2_len + vlan_sz +
+ buf->l3_len + buf->l4_len;
+ tso_segsz = buf->tso_segsz;
+ if (unlikely(tso_segsz == 0)) {
+ txq->stats.oerrors++;
+ break;
+ }
+ if (is_tunneled && txq->tunnel_en) {
+ tso_header_sz += buf->outer_l2_len +
+ buf->outer_l3_len;
+ cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+ } else {
+ cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ }
+ if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) {
+ txq->stats.oerrors++;
+ break;
+ }
+ copy_b = tso_header_sz - pkt_inline_sz;
+ /* First seg must contain all headers. */
+ assert(copy_b <= length);
+ if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
+ uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+
+ if (unlikely(max_wqe < n))
break;
- }
- copy_b = tso_header_sz - pkt_inline_sz;
- /* First seg must contain all headers. */
- assert(copy_b <= length);
- if (copy_b &&
- ((end - (uintptr_t)raw) > copy_b)) {
- uint16_t n = (MLX5_WQE_DS(copy_b) -
- 1 + 3) / 4;
-
- if (unlikely(max_wqe < n))
- break;
- max_wqe -= n;
- rte_memcpy((void *)raw,
- (void *)addr, copy_b);
- addr += copy_b;
- length -= copy_b;
- /* Include padding for TSO header. */
- copy_b = MLX5_WQE_DS(copy_b) *
- MLX5_WQE_DWORD_SIZE;
- pkt_inline_sz += copy_b;
- raw += copy_b;
- } else {
- /* NOP WQE. */
- wqe->ctrl = (rte_v128u32_t){
- rte_cpu_to_be_32(
- txq->wqe_ci << 8),
- rte_cpu_to_be_32(
- txq->qp_num_8s | 1),
- 0,
- 0,
- };
- ds = 1;
+ max_wqe -= n;
+ rte_memcpy((void *)raw, (void *)addr, copy_b);
+ addr += copy_b;
+ length -= copy_b;
+ /* Include padding for TSO header. */
+ copy_b = MLX5_WQE_DS(copy_b) *
+ MLX5_WQE_DWORD_SIZE;
+ pkt_inline_sz += copy_b;
+ raw += copy_b;
+ } else {
+ /* NOP WQE. */
+ wqe->ctrl = (rte_v128u32_t){
+ rte_cpu_to_be_32(txq->wqe_ci << 8),
+ rte_cpu_to_be_32(txq->qp_num_8s | 1),
+ 0,
+ 0,
+ };
+ ds = 1;
#ifdef MLX5_PMD_SOFT_COUNTERS
- total_length = 0;
+ total_length = 0;
#endif
- k++;
- goto next_wqe;
- }
+ k++;
+ goto next_wqe;
}
}
/* Inline if enough room. */
if (max_inline || tso) {
- uint32_t inl;
+ uint32_t inl = 0;
uintptr_t end = (uintptr_t)
(((uintptr_t)txq->wqes) +
(1 << txq->wqe_n) * MLX5_WQE_SIZE);
RTE_CACHE_LINE_SIZE -
(pkt_inline_sz - 2) -
!!tso * sizeof(inl);
- uintptr_t addr_end = (addr + inline_room) &
- ~(RTE_CACHE_LINE_SIZE - 1);
- unsigned int copy_b = (addr_end > addr) ?
- RTE_MIN((addr_end - addr), length) :
- 0;
-
+ uintptr_t addr_end;
+ unsigned int copy_b;
+
+pkt_inline:
+ addr_end = RTE_ALIGN_FLOOR(addr + inline_room,
+ RTE_CACHE_LINE_SIZE);
+ copy_b = (addr_end > addr) ?
+ RTE_MIN((addr_end - addr), length) : 0;
if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
/*
* One Dseg remains in the current WQE. To
if (unlikely(max_wqe < n))
break;
max_wqe -= n;
- if (tso) {
+ if (tso && !inl) {
inl = rte_cpu_to_be_32(copy_b |
MLX5_INLINE_SEG);
rte_memcpy((void *)raw,
} else if (!segs_n) {
goto next_pkt;
} else {
- /* dseg will be advance as part of next_seg */
- dseg = (volatile rte_v128u32_t *)
- ((uintptr_t)wqe +
- ((ds - 1) * MLX5_WQE_DWORD_SIZE));
- goto next_seg;
+ raw += copy_b;
+ inline_room -= copy_b;
+ --segs_n;
+ buf = buf->next;
+ assert(buf);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length += length;
+#endif
+ (*txq->elts)[++elts_head & elts_m] = buf;
+ goto pkt_inline;
}
} else {
/*
addr >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
- ++sg;
- /* Advance counter only if all segs are successfully posted. */
- if (sg < segs_n)
+ if (--segs_n)
goto next_seg;
- else
- j += sg;
next_pkt:
if (ds > MLX5_DSEG_MAX) {
txq->stats.oerrors++;
++elts_head;
++pkts;
++i;
+ j += sg;
/* Initialize known and common part of the WQE structure. */
if (tso) {
wqe->ctrl = (rte_v128u32_t){
/* Save elts_head in unused "immediate" field of WQE. */
last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
+#ifndef NDEBUG
+ ++txq->cq_pi;
+#endif
} else {
txq->elts_comp = comp;
}
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
struct rte_mbuf *buf = *(pkts++);
uint32_t length;
unsigned int segs_n = buf->nb_segs;
- uint32_t cs_flags = 0;
+ uint32_t cs_flags;
/*
* Make sure there is enough room to store this packet and
}
max_elts -= segs_n;
--pkts_n;
- /* Should we enable HW CKSUM offload */
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
- cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ cs_flags = txq_ol_cksum_to_cs(txq, buf);
/* Retrieve packet information. */
length = PKT_LEN(buf);
assert(length);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
+#ifndef NDEBUG
+ ++txq->cq_pi;
+#endif
} else {
txq->elts_comp = comp;
}
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
do {
struct rte_mbuf *buf = *(pkts++);
uintptr_t addr;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
- uint32_t cs_flags = 0;
+ uint8_t cs_flags;
/*
* Make sure there is enough room to store this packet and
* iteration.
*/
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
- /* Should we enable HW CKSUM offload */
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
- cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ cs_flags = txq_ol_cksum_to_cs(txq, buf);
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if packet differs. */
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
+#ifndef NDEBUG
+ ++txq->cq_pi;
+#endif
} else {
txq->elts_comp = comp;
}
unsigned int do_inline = 0; /* Whether inline is possible. */
uint32_t length;
unsigned int segs_n = buf->nb_segs;
- uint32_t cs_flags = 0;
+ uint8_t cs_flags;
/*
* Make sure there is enough room to store this packet and
txq->stats.oerrors++;
break;
}
- /* Should we enable HW CKSUM offload. */
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
- cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ cs_flags = txq_ol_cksum_to_cs(txq, buf);
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if:
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
txq->mpw_comp = txq->wqe_ci;
- txq->cq_pi++;
+#ifndef NDEBUG
+ ++txq->cq_pi;
+#endif
} else {
txq->elts_comp += j;
}