mlx5_tx_burst() doesn't inline data from the 2nd segment. If there's still
enough room in the descriptor after inlining the 1st segment, further
inlining from the 2nd segment would be beneficial to save PCIe bandwidth.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
if (max_elts < segs_n)
break;
max_elts -= segs_n;
if (max_elts < segs_n)
break;
max_elts -= segs_n;
if (unlikely(--max_wqe == 0))
break;
wqe = (volatile struct mlx5_wqe_v *)
if (unlikely(--max_wqe == 0))
break;
wqe = (volatile struct mlx5_wqe_v *)
}
/* Inline if enough room. */
if (max_inline || tso) {
}
/* Inline if enough room. */
if (max_inline || tso) {
uintptr_t end = (uintptr_t)
(((uintptr_t)txq->wqes) +
(1 << txq->wqe_n) * MLX5_WQE_SIZE);
uintptr_t end = (uintptr_t)
(((uintptr_t)txq->wqes) +
(1 << txq->wqe_n) * MLX5_WQE_SIZE);
RTE_CACHE_LINE_SIZE -
(pkt_inline_sz - 2) -
!!tso * sizeof(inl);
RTE_CACHE_LINE_SIZE -
(pkt_inline_sz - 2) -
!!tso * sizeof(inl);
- uintptr_t addr_end = (addr + inline_room) &
- ~(RTE_CACHE_LINE_SIZE - 1);
- unsigned int copy_b = (addr_end > addr) ?
- RTE_MIN((addr_end - addr), length) :
- 0;
-
+ uintptr_t addr_end;
+ unsigned int copy_b;
+
+pkt_inline:
+ addr_end = RTE_ALIGN_FLOOR(addr + inline_room,
+ RTE_CACHE_LINE_SIZE);
+ copy_b = (addr_end > addr) ?
+ RTE_MIN((addr_end - addr), length) : 0;
if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
/*
* One Dseg remains in the current WQE. To
if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
/*
* One Dseg remains in the current WQE. To
if (unlikely(max_wqe < n))
break;
max_wqe -= n;
if (unlikely(max_wqe < n))
break;
max_wqe -= n;
inl = rte_cpu_to_be_32(copy_b |
MLX5_INLINE_SEG);
rte_memcpy((void *)raw,
inl = rte_cpu_to_be_32(copy_b |
MLX5_INLINE_SEG);
rte_memcpy((void *)raw,
} else if (!segs_n) {
goto next_pkt;
} else {
} else if (!segs_n) {
goto next_pkt;
} else {
- /* dseg will be advance as part of next_seg */
- dseg = (volatile rte_v128u32_t *)
- ((uintptr_t)wqe +
- ((ds - 1) * MLX5_WQE_DWORD_SIZE));
- goto next_seg;
+ raw += copy_b;
+ inline_room -= copy_b;
+ --segs_n;
+ buf = buf->next;
+ assert(buf);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length += length;
+#endif
+ (*txq->elts)[++elts_head & elts_m] = buf;
+ goto pkt_inline;
addr >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
addr >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
- ++sg;
- /* Advance counter only if all segs are successfully posted. */
- if (sg < segs_n)
next_pkt:
if (ds > MLX5_DSEG_MAX) {
txq->stats.oerrors++;
next_pkt:
if (ds > MLX5_DSEG_MAX) {
txq->stats.oerrors++;
++elts_head;
++pkts;
++i;
++elts_head;
++pkts;
++i;
/* Initialize known and common part of the WQE structure. */
if (tso) {
wqe->ctrl = (rte_v128u32_t){
/* Initialize known and common part of the WQE structure. */
if (tso) {
wqe->ctrl = (rte_v128u32_t){