1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
14 #pragma GCC diagnostic ignored "-Wpedantic"
16 #include <infiniband/verbs.h>
17 #include <infiniband/mlx5dv.h>
19 #pragma GCC diagnostic error "-Wpedantic"
23 #include <rte_mempool.h>
24 #include <rte_prefetch.h>
25 #include <rte_common.h>
26 #include <rte_branch_prediction.h>
27 #include <rte_ether.h>
28 #include <rte_cycles.h>
31 #include "mlx5_utils.h"
32 #include "mlx5_rxtx.h"
33 #include "mlx5_autoconf.h"
34 #include "mlx5_defs.h"
37 static __rte_always_inline uint32_t
38 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
40 static __rte_always_inline int
41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
42 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
44 static __rte_always_inline uint32_t
45 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
47 static __rte_always_inline void
48 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
49 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
51 static __rte_always_inline void
52 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx);
55 mlx5_queue_state_modify(struct rte_eth_dev *dev,
56 struct mlx5_mp_arg_queue_state_modify *sm);
58 uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
59 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
62 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned;
63 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned;
66 * Build a table to translate Rx completion flags to packet type.
68 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
71 mlx5_set_ptype_table(void)
74 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table;
76 /* Last entry must not be overwritten, reserved for errored packet. */
77 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i)
78 (*p)[i] = RTE_PTYPE_UNKNOWN;
80 * The index to the array should have:
81 * bit[1:0] = l3_hdr_type
82 * bit[4:2] = l4_hdr_type
85 * bit[7] = outer_l3_type
88 (*p)[0x00] = RTE_PTYPE_L2_ETHER;
90 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
92 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
95 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
97 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
100 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
102 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
104 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
106 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
108 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
110 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
113 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
115 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
117 /* Repeat with outer_l3_type being set. Just in case. */
118 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
119 RTE_PTYPE_L4_NONFRAG;
120 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
121 RTE_PTYPE_L4_NONFRAG;
122 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
124 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
126 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
128 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
130 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
132 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
134 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
136 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
138 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
140 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
143 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
144 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
145 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
146 RTE_PTYPE_INNER_L4_NONFRAG;
147 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
148 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
149 RTE_PTYPE_INNER_L4_NONFRAG;
150 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
151 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
152 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
153 RTE_PTYPE_INNER_L4_NONFRAG;
154 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
155 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
156 RTE_PTYPE_INNER_L4_NONFRAG;
157 /* Tunneled - Fragmented */
158 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
159 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
160 RTE_PTYPE_INNER_L4_FRAG;
161 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
162 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
163 RTE_PTYPE_INNER_L4_FRAG;
164 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
165 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
166 RTE_PTYPE_INNER_L4_FRAG;
167 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
168 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
169 RTE_PTYPE_INNER_L4_FRAG;
171 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
172 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
173 RTE_PTYPE_INNER_L4_TCP;
174 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
175 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
176 RTE_PTYPE_INNER_L4_TCP;
177 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
178 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
179 RTE_PTYPE_INNER_L4_TCP;
180 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
181 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
182 RTE_PTYPE_INNER_L4_TCP;
183 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
184 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
185 RTE_PTYPE_INNER_L4_TCP;
186 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
187 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
188 RTE_PTYPE_INNER_L4_TCP;
189 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
190 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
191 RTE_PTYPE_INNER_L4_TCP;
192 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
193 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
194 RTE_PTYPE_INNER_L4_TCP;
195 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
196 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
197 RTE_PTYPE_INNER_L4_TCP;
198 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
199 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
200 RTE_PTYPE_INNER_L4_TCP;
201 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
202 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
203 RTE_PTYPE_INNER_L4_TCP;
204 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
205 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
206 RTE_PTYPE_INNER_L4_TCP;
208 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
209 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
210 RTE_PTYPE_INNER_L4_UDP;
211 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
212 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
213 RTE_PTYPE_INNER_L4_UDP;
214 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
215 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
216 RTE_PTYPE_INNER_L4_UDP;
217 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
218 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
219 RTE_PTYPE_INNER_L4_UDP;
223 * Build a table to translate packet to checksum type of Verbs.
226 mlx5_set_cksum_table(void)
232 * The index should have:
233 * bit[0] = PKT_TX_TCP_SEG
234 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM
235 * bit[4] = PKT_TX_IP_CKSUM
236 * bit[8] = PKT_TX_OUTER_IP_CKSUM
239 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) {
242 /* Tunneled packet. */
243 if (i & (1 << 8)) /* Outer IP. */
244 v |= MLX5_ETH_WQE_L3_CSUM;
245 if (i & (1 << 4)) /* Inner IP. */
246 v |= MLX5_ETH_WQE_L3_INNER_CSUM;
247 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */
248 v |= MLX5_ETH_WQE_L4_INNER_CSUM;
251 if (i & (1 << 4)) /* IP. */
252 v |= MLX5_ETH_WQE_L3_CSUM;
253 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */
254 v |= MLX5_ETH_WQE_L4_CSUM;
256 mlx5_cksum_table[i] = v;
261 * Build a table to translate packet type of mbuf to SWP type of Verbs.
264 mlx5_set_swp_types_table(void)
270 * The index should have:
271 * bit[0:1] = PKT_TX_L4_MASK
272 * bit[4] = PKT_TX_IPV6
273 * bit[8] = PKT_TX_OUTER_IPV6
274 * bit[9] = PKT_TX_OUTER_UDP
276 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) {
279 v |= MLX5_ETH_WQE_L3_OUTER_IPV6;
281 v |= MLX5_ETH_WQE_L4_OUTER_UDP;
283 v |= MLX5_ETH_WQE_L3_INNER_IPV6;
284 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52))
285 v |= MLX5_ETH_WQE_L4_INNER_UDP;
286 mlx5_swp_types_table[i] = v;
291 * Return the size of tailroom of WQ.
294 * Pointer to TX queue structure.
296 * Pointer to tail of WQ.
302 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
305 tailroom = (uintptr_t)(txq->wqes) +
306 (1 << txq->wqe_n) * MLX5_WQE_SIZE -
312 * Copy data to tailroom of circular queue.
315 * Pointer to destination.
319 * Number of bytes to copy.
321 * Pointer to head of queue.
323 * Size of tailroom from dst.
326 * Pointer after copied data.
329 mlx5_copy_to_wq(void *dst, const void *src, size_t n,
330 void *base, size_t tailroom)
335 rte_memcpy(dst, src, tailroom);
336 rte_memcpy(base, (void *)((uintptr_t)src + tailroom),
338 ret = (uint8_t *)base + n - tailroom;
340 rte_memcpy(dst, src, n);
341 ret = (n == tailroom) ? base : (uint8_t *)dst + n;
347 * Inline TSO headers into WQE.
350 * 0 on success, negative errno value on failure.
353 inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf,
356 uint16_t *pkt_inline_sz,
360 uint16_t *tso_header_sz)
362 uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) +
363 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
365 uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
366 const uint8_t tunneled = txq->tunnel_en && (buf->ol_flags &
370 *tso_segsz = buf->tso_segsz;
371 *tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len;
372 if (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) {
373 txq->stats.oerrors++;
377 *tso_header_sz += buf->outer_l2_len + buf->outer_l3_len;
378 /* First seg must contain all TSO headers. */
379 if (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) ||
380 *tso_header_sz > DATA_LEN(buf)) {
381 txq->stats.oerrors++;
384 copy_b = *tso_header_sz - *pkt_inline_sz;
385 if (!copy_b || ((end - (uintptr_t)*raw) < copy_b))
387 n_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
388 if (unlikely(*max_wqe < n_wqe))
391 rte_memcpy((void *)*raw, (void *)*addr, copy_b);
394 copy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE;
395 *pkt_inline_sz += copy_b;
401 * DPDK callback to check the status of a tx descriptor.
406 * The index of the descriptor in the ring.
409 * The status of the tx descriptor.
412 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
414 struct mlx5_txq_data *txq = tx_queue;
417 mlx5_tx_complete(txq);
418 used = txq->elts_head - txq->elts_tail;
420 return RTE_ETH_TX_DESC_FULL;
421 return RTE_ETH_TX_DESC_DONE;
425 * Internal function to compute the number of used descriptors in an RX queue
431 * The number of used rx descriptor.
434 rx_queue_count(struct mlx5_rxq_data *rxq)
436 struct rxq_zip *zip = &rxq->zip;
437 volatile struct mlx5_cqe *cqe;
438 const unsigned int cqe_n = (1 << rxq->cqe_n);
439 const unsigned int cqe_cnt = cqe_n - 1;
443 /* if we are processing a compressed cqe */
445 used = zip->cqe_cnt - zip->ca;
451 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
452 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
456 op_own = cqe->op_own;
457 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
458 n = rte_be_to_cpu_32(cqe->byte_cnt);
463 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
465 used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
470 * DPDK callback to check the status of a rx descriptor.
475 * The index of the descriptor in the ring.
478 * The status of the tx descriptor.
481 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
483 struct mlx5_rxq_data *rxq = rx_queue;
484 struct mlx5_rxq_ctrl *rxq_ctrl =
485 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
486 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
488 if (dev->rx_pkt_burst != mlx5_rx_burst) {
492 if (offset >= (1 << rxq->elts_n)) {
496 if (offset < rx_queue_count(rxq))
497 return RTE_ETH_RX_DESC_DONE;
498 return RTE_ETH_RX_DESC_AVAIL;
502 * DPDK callback to get the number of used descriptors in a RX queue
505 * Pointer to the device structure.
511 * The number of used rx descriptor.
512 * -EINVAL if the queue is invalid
515 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
517 struct mlx5_priv *priv = dev->data->dev_private;
518 struct mlx5_rxq_data *rxq;
520 if (dev->rx_pkt_burst != mlx5_rx_burst) {
524 rxq = (*priv->rxqs)[rx_queue_id];
529 return rx_queue_count(rxq);
532 #define MLX5_SYSTEM_LOG_DIR "/var/log"
534 * Dump debug information to log file.
539 * If not NULL this string is printed as a header to the output
540 * and the output will be in hexadecimal view.
542 * This is the buffer address to print out.
544 * The number of bytes to dump out.
547 mlx5_dump_debug_information(const char *fname, const char *hex_title,
548 const void *buf, unsigned int hex_len)
552 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname);
553 fd = fopen(path, "a+");
555 DRV_LOG(WARNING, "cannot open %s for debug dump\n",
557 MKSTR(path2, "./%s", fname);
558 fd = fopen(path2, "a+");
560 DRV_LOG(ERR, "cannot open %s for debug dump\n",
564 DRV_LOG(INFO, "New debug dump in file %s\n", path2);
566 DRV_LOG(INFO, "New debug dump in file %s\n", path);
569 rte_hexdump(fd, hex_title, buf, hex_len);
571 fprintf(fd, "%s", (const char *)buf);
572 fprintf(fd, "\n\n\n");
577 * Move QP from error state to running state and initialize indexes.
580 * Pointer to TX queue control structure.
583 * 0 on success, else -1.
586 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
588 struct mlx5_mp_arg_queue_state_modify sm = {
590 .queue_id = txq_ctrl->txq.idx,
593 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm))
595 txq_ctrl->txq.wqe_ci = 0;
596 txq_ctrl->txq.wqe_pi = 0;
597 txq_ctrl->txq.elts_comp = 0;
601 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
603 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
605 static const uint8_t magic[] = "seen";
609 for (i = 0; i < sizeof(magic); ++i)
610 if (!ret || err_cqe->rsvd1[i] != magic[i]) {
612 err_cqe->rsvd1[i] = magic[i];
621 * Pointer to TX queue structure.
623 * Pointer to the error CQE.
626 * The last Tx buffer element to free.
629 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
630 volatile struct mlx5_err_cqe *err_cqe)
632 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
633 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
634 struct mlx5_txq_ctrl *txq_ctrl =
635 container_of(txq, struct mlx5_txq_ctrl, txq);
636 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter);
637 int seen = check_err_cqe_seen(err_cqe);
639 if (!seen && txq_ctrl->dump_file_n <
640 txq_ctrl->priv->config.max_dump_files_num) {
641 MKSTR(err_str, "Unexpected CQE error syndrome "
642 "0x%02x CQN = %u SQN = %u wqe_counter = %u "
643 "wq_ci = %u cq_ci = %u", err_cqe->syndrome,
644 txq_ctrl->cqn, txq->qp_num_8s >> 8,
645 rte_be_to_cpu_16(err_cqe->wqe_counter),
646 txq->wqe_ci, txq->cq_ci);
647 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u",
648 PORT_ID(txq_ctrl->priv), txq->idx,
649 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc());
650 mlx5_dump_debug_information(name, NULL, err_str, 0);
651 mlx5_dump_debug_information(name, "MLX5 Error CQ:",
652 (const void *)((uintptr_t)
656 mlx5_dump_debug_information(name, "MLX5 Error SQ:",
657 (const void *)((uintptr_t)
658 tx_mlx5_wqe(txq, 0)),
661 txq_ctrl->dump_file_n++;
665 * Count errors in WQEs units.
666 * Later it can be improved to count error packets,
667 * for example, by SQ parsing to find how much packets
668 * should be counted for each WQE.
670 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
672 if (tx_recover_qp(txq_ctrl) == 0) {
674 /* Release all the remaining buffers. */
675 return txq->elts_head;
677 /* Recovering failed - try again later on the same WQE. */
681 /* Do not release buffers. */
682 return txq->elts_tail;
686 * DPDK callback for TX.
689 * Generic pointer to TX queue structure.
691 * Packets to transmit.
693 * Number of packets in array.
696 * Number of packets successfully transmitted (<= pkts_n).
699 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
701 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
702 uint16_t elts_head = txq->elts_head;
703 const uint16_t elts_n = 1 << txq->elts_n;
704 const uint16_t elts_m = elts_n - 1;
711 volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
712 unsigned int segs_n = 0;
713 const unsigned int max_inline = txq->max_inline;
716 if (unlikely(!pkts_n))
718 /* Prefetch first packet cacheline. */
719 rte_prefetch0(*pkts);
720 /* Start processing. */
721 mlx5_tx_complete(txq);
722 max_elts = (elts_n - (elts_head - txq->elts_tail));
723 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
724 if (unlikely(!max_wqe))
727 struct rte_mbuf *buf = *pkts; /* First_seg. */
729 volatile struct mlx5_wqe_v *wqe = NULL;
730 volatile rte_v128u32_t *dseg = NULL;
733 unsigned int sg = 0; /* counter of additional segs attached. */
735 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
736 uint16_t tso_header_sz = 0;
739 uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
740 uint32_t swp_offsets = 0;
741 uint8_t swp_types = 0;
743 uint16_t tso_segsz = 0;
744 #ifdef MLX5_PMD_SOFT_COUNTERS
745 uint32_t total_length = 0;
749 segs_n = buf->nb_segs;
751 * Make sure there is enough room to store this packet and
752 * that one ring entry remains unused.
755 if (max_elts < segs_n)
759 if (unlikely(--max_wqe == 0))
761 wqe = (volatile struct mlx5_wqe_v *)
762 tx_mlx5_wqe(txq, txq->wqe_ci);
763 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
765 rte_prefetch0(*(pkts + 1));
766 addr = rte_pktmbuf_mtod(buf, uintptr_t);
767 length = DATA_LEN(buf);
768 ehdr = (((uint8_t *)addr)[1] << 8) |
769 ((uint8_t *)addr)[0];
770 #ifdef MLX5_PMD_SOFT_COUNTERS
771 total_length = length;
773 if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
774 txq->stats.oerrors++;
777 /* Update element. */
778 (*txq->elts)[elts_head & elts_m] = buf;
779 /* Prefetch next buffer data. */
782 rte_pktmbuf_mtod(*(pkts + 1), volatile void *));
783 cs_flags = txq_ol_cksum_to_cs(buf);
784 txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
785 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
786 /* Copy metadata from mbuf if valid */
787 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
789 /* Replace the Ethernet type by the VLAN if necessary. */
790 if (buf->ol_flags & PKT_TX_VLAN_PKT) {
791 uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
793 unsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2;
797 /* Copy Destination and source mac address. */
798 memcpy((uint8_t *)raw, ((uint8_t *)addr), len);
800 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan));
801 /* Copy missing two bytes to end the DSeg. */
802 memcpy((uint8_t *)raw + len + sizeof(vlan),
803 ((uint8_t *)addr) + len, 2);
807 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2,
808 MLX5_WQE_DWORD_SIZE);
809 length -= pkt_inline_sz;
810 addr += pkt_inline_sz;
812 raw += MLX5_WQE_DWORD_SIZE;
814 ret = inline_tso(txq, buf, &length,
815 &addr, &pkt_inline_sz,
817 &tso_segsz, &tso_header_sz);
818 if (ret == -EINVAL) {
820 } else if (ret == -EAGAIN) {
822 wqe->ctrl = (rte_v128u32_t){
823 rte_cpu_to_be_32(txq->wqe_ci << 8),
824 rte_cpu_to_be_32(txq->qp_num_8s | 1),
826 (MLX5_COMP_ONLY_FIRST_ERR <<
827 MLX5_COMP_MODE_OFFSET),
831 #ifdef MLX5_PMD_SOFT_COUNTERS
838 /* Inline if enough room. */
839 if (max_inline || tso) {
841 uintptr_t end = (uintptr_t)
842 (((uintptr_t)txq->wqes) +
843 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
844 unsigned int inline_room = max_inline *
845 RTE_CACHE_LINE_SIZE -
846 (pkt_inline_sz - 2) -
852 addr_end = RTE_ALIGN_FLOOR(addr + inline_room,
853 RTE_CACHE_LINE_SIZE);
854 copy_b = (addr_end > addr) ?
855 RTE_MIN((addr_end - addr), length) : 0;
856 if (copy_b && ((end - (uintptr_t)raw) >
857 (copy_b + sizeof(inl)))) {
859 * One Dseg remains in the current WQE. To
860 * keep the computation positive, it is
861 * removed after the bytes to Dseg conversion.
863 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
865 if (unlikely(max_wqe < n))
870 inl = rte_cpu_to_be_32(copy_b |
872 rte_memcpy((void *)raw,
873 (void *)&inl, sizeof(inl));
875 pkt_inline_sz += sizeof(inl);
877 rte_memcpy((void *)raw, (void *)addr, copy_b);
880 pkt_inline_sz += copy_b;
883 * 2 DWORDs consumed by the WQE header + ETH segment +
884 * the size of the inline part of the packet.
886 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
888 if (ds % (MLX5_WQE_SIZE /
889 MLX5_WQE_DWORD_SIZE) == 0) {
890 if (unlikely(--max_wqe == 0))
892 dseg = (volatile rte_v128u32_t *)
893 tx_mlx5_wqe(txq, txq->wqe_ci +
896 dseg = (volatile rte_v128u32_t *)
898 (ds * MLX5_WQE_DWORD_SIZE));
901 } else if (!segs_n) {
905 * Further inline the next segment only for
910 inline_room -= copy_b;
914 /* Move to the next segment. */
918 addr = rte_pktmbuf_mtod(buf, uintptr_t);
919 length = DATA_LEN(buf);
920 #ifdef MLX5_PMD_SOFT_COUNTERS
921 total_length += length;
923 (*txq->elts)[++elts_head & elts_m] = buf;
928 * No inline has been done in the packet, only the
929 * Ethernet Header as been stored.
931 dseg = (volatile rte_v128u32_t *)
932 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
935 /* Add the remaining packet as a simple ds. */
936 addr_64 = rte_cpu_to_be_64(addr);
937 *dseg = (rte_v128u32_t){
938 rte_cpu_to_be_32(length),
939 mlx5_tx_mb2mr(txq, buf),
952 * Spill on next WQE when the current one does not have
953 * enough room left. Size of WQE must a be a multiple
954 * of data segment size.
956 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
957 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
958 if (unlikely(--max_wqe == 0))
960 dseg = (volatile rte_v128u32_t *)
961 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);
962 rte_prefetch0(tx_mlx5_wqe(txq,
963 txq->wqe_ci + ds / 4 + 1));
970 length = DATA_LEN(buf);
971 #ifdef MLX5_PMD_SOFT_COUNTERS
972 total_length += length;
974 /* Store segment information. */
975 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
976 *dseg = (rte_v128u32_t){
977 rte_cpu_to_be_32(length),
978 mlx5_tx_mb2mr(txq, buf),
982 (*txq->elts)[++elts_head & elts_m] = buf;
986 if (ds > MLX5_DSEG_MAX) {
987 txq->stats.oerrors++;
994 /* Initialize known and common part of the WQE structure. */
996 wqe->ctrl = (rte_v128u32_t){
997 rte_cpu_to_be_32((txq->wqe_ci << 8) |
999 rte_cpu_to_be_32(txq->qp_num_8s | ds),
1000 rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
1001 MLX5_COMP_MODE_OFFSET),
1004 wqe->eseg = (rte_v128u32_t){
1006 cs_flags | (swp_types << 8) |
1007 (rte_cpu_to_be_16(tso_segsz) << 16),
1009 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
1012 wqe->ctrl = (rte_v128u32_t){
1013 rte_cpu_to_be_32((txq->wqe_ci << 8) |
1015 rte_cpu_to_be_32(txq->qp_num_8s | ds),
1016 rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
1017 MLX5_COMP_MODE_OFFSET),
1020 wqe->eseg = (rte_v128u32_t){
1022 cs_flags | (swp_types << 8),
1024 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
1028 txq->wqe_ci += (ds + 3) / 4;
1029 /* Save the last successful WQE for completion request */
1030 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;
1031 #ifdef MLX5_PMD_SOFT_COUNTERS
1032 /* Increment sent bytes counter. */
1033 txq->stats.obytes += total_length;
1035 } while (i < pkts_n);
1036 /* Take a shortcut if nothing must be sent. */
1037 if (unlikely((i + k) == 0))
1039 txq->elts_head += (i + j);
1040 /* Check whether completion threshold has been reached. */
1041 comp = txq->elts_comp + i + j + k;
1042 if (comp >= MLX5_TX_COMP_THRESH) {
1043 /* A CQE slot must always be available. */
1044 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
1045 /* Request completion on last WQE. */
1046 last_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
1047 MLX5_COMP_MODE_OFFSET);
1048 /* Save elts_head in unused "immediate" field of WQE. */
1049 last_wqe->ctrl3 = txq->elts_head;
1052 txq->elts_comp = comp;
1054 #ifdef MLX5_PMD_SOFT_COUNTERS
1055 /* Increment sent packets counter. */
1056 txq->stats.opackets += i;
1058 /* Ring QP doorbell. */
1059 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);
1064 * Open a MPW session.
1067 * Pointer to TX queue structure.
1069 * Pointer to MPW session structure.
1074 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
1076 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
1077 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
1078 (volatile struct mlx5_wqe_data_seg (*)[])
1079 tx_mlx5_wqe(txq, idx + 1);
1081 mpw->state = MLX5_MPW_STATE_OPENED;
1085 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
1086 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
1087 mpw->wqe->eseg.inline_hdr_sz = 0;
1088 mpw->wqe->eseg.rsvd0 = 0;
1089 mpw->wqe->eseg.rsvd1 = 0;
1090 mpw->wqe->eseg.flow_table_metadata = 0;
1091 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
1092 (txq->wqe_ci << 8) |
1094 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
1095 MLX5_COMP_MODE_OFFSET);
1096 mpw->wqe->ctrl[3] = 0;
1097 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
1098 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
1099 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
1100 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
1101 mpw->data.dseg[2] = &(*dseg)[0];
1102 mpw->data.dseg[3] = &(*dseg)[1];
1103 mpw->data.dseg[4] = &(*dseg)[2];
1107 * Close a MPW session.
1110 * Pointer to TX queue structure.
1112 * Pointer to MPW session structure.
1115 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
1117 unsigned int num = mpw->pkts_n;
1120 * Store size in multiple of 16 bytes. Control and Ethernet segments
1123 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num));
1124 mpw->state = MLX5_MPW_STATE_CLOSED;
1129 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
1130 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
1134 * DPDK callback for TX with MPW support.
1137 * Generic pointer to TX queue structure.
1139 * Packets to transmit.
1141 * Number of packets in array.
1144 * Number of packets successfully transmitted (<= pkts_n).
1147 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
1149 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
1150 uint16_t elts_head = txq->elts_head;
1151 const uint16_t elts_n = 1 << txq->elts_n;
1152 const uint16_t elts_m = elts_n - 1;
1158 struct mlx5_mpw mpw = {
1159 .state = MLX5_MPW_STATE_CLOSED,
1162 if (unlikely(!pkts_n))
1164 /* Prefetch first packet cacheline. */
1165 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
1166 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
1167 /* Start processing. */
1168 mlx5_tx_complete(txq);
1169 max_elts = (elts_n - (elts_head - txq->elts_tail));
1170 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
1171 if (unlikely(!max_wqe))
1174 struct rte_mbuf *buf = *(pkts++);
1176 unsigned int segs_n = buf->nb_segs;
1178 rte_be32_t metadata;
1181 * Make sure there is enough room to store this packet and
1182 * that one ring entry remains unused.
1185 if (max_elts < segs_n)
1187 /* Do not bother with large packets MPW cannot handle. */
1188 if (segs_n > MLX5_MPW_DSEG_MAX) {
1189 txq->stats.oerrors++;
1194 cs_flags = txq_ol_cksum_to_cs(buf);
1195 /* Copy metadata from mbuf if valid */
1196 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1198 /* Retrieve packet information. */
1199 length = PKT_LEN(buf);
1201 /* Start new session if packet differs. */
1202 if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
1203 ((mpw.len != length) ||
1205 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
1206 (mpw.wqe->eseg.cs_flags != cs_flags)))
1207 mlx5_mpw_close(txq, &mpw);
1208 if (mpw.state == MLX5_MPW_STATE_CLOSED) {
1210 * Multi-Packet WQE consumes at most two WQE.
1211 * mlx5_mpw_new() expects to be able to use such
1214 if (unlikely(max_wqe < 2))
1217 mlx5_mpw_new(txq, &mpw, length);
1218 mpw.wqe->eseg.cs_flags = cs_flags;
1219 mpw.wqe->eseg.flow_table_metadata = metadata;
1221 /* Multi-segment packets must be alone in their MPW. */
1222 assert((segs_n == 1) || (mpw.pkts_n == 0));
1223 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1227 volatile struct mlx5_wqe_data_seg *dseg;
1231 (*txq->elts)[elts_head++ & elts_m] = buf;
1232 dseg = mpw.data.dseg[mpw.pkts_n];
1233 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1234 *dseg = (struct mlx5_wqe_data_seg){
1235 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1236 .lkey = mlx5_tx_mb2mr(txq, buf),
1237 .addr = rte_cpu_to_be_64(addr),
1239 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1240 length += DATA_LEN(buf);
1246 assert(length == mpw.len);
1247 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1248 mlx5_mpw_close(txq, &mpw);
1249 #ifdef MLX5_PMD_SOFT_COUNTERS
1250 /* Increment sent bytes counter. */
1251 txq->stats.obytes += length;
1255 /* Take a shortcut if nothing must be sent. */
1256 if (unlikely(i == 0))
1258 /* Check whether completion threshold has been reached. */
1259 /* "j" includes both packets and segments. */
1260 comp = txq->elts_comp + j;
1261 if (comp >= MLX5_TX_COMP_THRESH) {
1262 volatile struct mlx5_wqe *wqe = mpw.wqe;
1264 /* A CQE slot must always be available. */
1265 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
1266 /* Request completion on last WQE. */
1267 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
1268 MLX5_COMP_MODE_OFFSET);
1269 /* Save elts_head in unused "immediate" field of WQE. */
1270 wqe->ctrl[3] = elts_head;
1273 txq->elts_comp = comp;
1275 #ifdef MLX5_PMD_SOFT_COUNTERS
1276 /* Increment sent packets counter. */
1277 txq->stats.opackets += i;
1279 /* Ring QP doorbell. */
1280 if (mpw.state == MLX5_MPW_STATE_OPENED)
1281 mlx5_mpw_close(txq, &mpw);
1282 mlx5_tx_dbrec(txq, mpw.wqe);
1283 txq->elts_head = elts_head;
1288 * Open a MPW inline session.
1291 * Pointer to TX queue structure.
1293 * Pointer to MPW session structure.
1298 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
1301 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
1302 struct mlx5_wqe_inl_small *inl;
1304 mpw->state = MLX5_MPW_INL_STATE_OPENED;
1308 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
1309 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
1310 (txq->wqe_ci << 8) |
1312 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
1313 MLX5_COMP_MODE_OFFSET);
1314 mpw->wqe->ctrl[3] = 0;
1315 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
1316 mpw->wqe->eseg.inline_hdr_sz = 0;
1317 mpw->wqe->eseg.cs_flags = 0;
1318 mpw->wqe->eseg.rsvd0 = 0;
1319 mpw->wqe->eseg.rsvd1 = 0;
1320 mpw->wqe->eseg.flow_table_metadata = 0;
1321 inl = (struct mlx5_wqe_inl_small *)
1322 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
1323 mpw->data.raw = (uint8_t *)&inl->raw;
1327 * Close a MPW inline session.
1330 * Pointer to TX queue structure.
1332 * Pointer to MPW session structure.
1335 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
1338 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
1339 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
1341 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
1343 * Store size in multiple of 16 bytes. Control and Ethernet segments
1346 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
1348 mpw->state = MLX5_MPW_STATE_CLOSED;
1349 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG);
1350 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
1354 * DPDK callback for TX with MPW inline support.
1357 * Generic pointer to TX queue structure.
1359 * Packets to transmit.
1361 * Number of packets in array.
1364 * Number of packets successfully transmitted (<= pkts_n).
1367 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
1370 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
1371 uint16_t elts_head = txq->elts_head;
1372 const uint16_t elts_n = 1 << txq->elts_n;
1373 const uint16_t elts_m = elts_n - 1;
1379 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
1380 struct mlx5_mpw mpw = {
1381 .state = MLX5_MPW_STATE_CLOSED,
1384 * Compute the maximum number of WQE which can be consumed by inline
1387 * - 1 control segment,
1388 * - 1 Ethernet segment,
1389 * - N Dseg from the inline request.
1391 const unsigned int wqe_inl_n =
1392 ((2 * MLX5_WQE_DWORD_SIZE +
1393 txq->max_inline * RTE_CACHE_LINE_SIZE) +
1394 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
1396 if (unlikely(!pkts_n))
1398 /* Prefetch first packet cacheline. */
1399 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
1400 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
1401 /* Start processing. */
1402 mlx5_tx_complete(txq);
1403 max_elts = (elts_n - (elts_head - txq->elts_tail));
1405 struct rte_mbuf *buf = *(pkts++);
1408 unsigned int segs_n = buf->nb_segs;
1410 rte_be32_t metadata;
1413 * Make sure there is enough room to store this packet and
1414 * that one ring entry remains unused.
1417 if (max_elts < segs_n)
1419 /* Do not bother with large packets MPW cannot handle. */
1420 if (segs_n > MLX5_MPW_DSEG_MAX) {
1421 txq->stats.oerrors++;
1427 * Compute max_wqe in case less WQE were consumed in previous
1430 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
1431 cs_flags = txq_ol_cksum_to_cs(buf);
1432 /* Copy metadata from mbuf if valid */
1433 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1435 /* Retrieve packet information. */
1436 length = PKT_LEN(buf);
1437 /* Start new session if packet differs. */
1438 if (mpw.state == MLX5_MPW_STATE_OPENED) {
1439 if ((mpw.len != length) ||
1441 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
1442 (mpw.wqe->eseg.cs_flags != cs_flags))
1443 mlx5_mpw_close(txq, &mpw);
1444 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
1445 if ((mpw.len != length) ||
1447 (length > inline_room) ||
1448 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
1449 (mpw.wqe->eseg.cs_flags != cs_flags)) {
1450 mlx5_mpw_inline_close(txq, &mpw);
1452 txq->max_inline * RTE_CACHE_LINE_SIZE;
1455 if (mpw.state == MLX5_MPW_STATE_CLOSED) {
1456 if ((segs_n != 1) ||
1457 (length > inline_room)) {
1459 * Multi-Packet WQE consumes at most two WQE.
1460 * mlx5_mpw_new() expects to be able to use
1463 if (unlikely(max_wqe < 2))
1466 mlx5_mpw_new(txq, &mpw, length);
1467 mpw.wqe->eseg.cs_flags = cs_flags;
1468 mpw.wqe->eseg.flow_table_metadata = metadata;
1470 if (unlikely(max_wqe < wqe_inl_n))
1472 max_wqe -= wqe_inl_n;
1473 mlx5_mpw_inline_new(txq, &mpw, length);
1474 mpw.wqe->eseg.cs_flags = cs_flags;
1475 mpw.wqe->eseg.flow_table_metadata = metadata;
1478 /* Multi-segment packets must be alone in their MPW. */
1479 assert((segs_n == 1) || (mpw.pkts_n == 0));
1480 if (mpw.state == MLX5_MPW_STATE_OPENED) {
1481 assert(inline_room ==
1482 txq->max_inline * RTE_CACHE_LINE_SIZE);
1483 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1487 volatile struct mlx5_wqe_data_seg *dseg;
1490 (*txq->elts)[elts_head++ & elts_m] = buf;
1491 dseg = mpw.data.dseg[mpw.pkts_n];
1492 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1493 *dseg = (struct mlx5_wqe_data_seg){
1495 rte_cpu_to_be_32(DATA_LEN(buf)),
1496 .lkey = mlx5_tx_mb2mr(txq, buf),
1497 .addr = rte_cpu_to_be_64(addr),
1499 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1500 length += DATA_LEN(buf);
1506 assert(length == mpw.len);
1507 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1508 mlx5_mpw_close(txq, &mpw);
1512 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
1513 assert(length <= inline_room);
1514 assert(length == DATA_LEN(buf));
1515 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1516 (*txq->elts)[elts_head++ & elts_m] = buf;
1517 /* Maximum number of bytes before wrapping. */
1518 max = ((((uintptr_t)(txq->wqes)) +
1521 (uintptr_t)mpw.data.raw);
1523 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1526 mpw.data.raw = (volatile void *)txq->wqes;
1527 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1528 (void *)(addr + max),
1530 mpw.data.raw += length - max;
1532 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1538 (volatile void *)txq->wqes;
1540 mpw.data.raw += length;
1543 mpw.total_len += length;
1545 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
1546 mlx5_mpw_inline_close(txq, &mpw);
1548 txq->max_inline * RTE_CACHE_LINE_SIZE;
1550 inline_room -= length;
1553 #ifdef MLX5_PMD_SOFT_COUNTERS
1554 /* Increment sent bytes counter. */
1555 txq->stats.obytes += length;
1559 /* Take a shortcut if nothing must be sent. */
1560 if (unlikely(i == 0))
1562 /* Check whether completion threshold has been reached. */
1563 /* "j" includes both packets and segments. */
1564 comp = txq->elts_comp + j;
1565 if (comp >= MLX5_TX_COMP_THRESH) {
1566 volatile struct mlx5_wqe *wqe = mpw.wqe;
1568 /* A CQE slot must always be available. */
1569 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
1570 /* Request completion on last WQE. */
1571 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
1572 MLX5_COMP_MODE_OFFSET);
1573 /* Save elts_head in unused "immediate" field of WQE. */
1574 wqe->ctrl[3] = elts_head;
1577 txq->elts_comp = comp;
1579 #ifdef MLX5_PMD_SOFT_COUNTERS
1580 /* Increment sent packets counter. */
1581 txq->stats.opackets += i;
1583 /* Ring QP doorbell. */
1584 if (mpw.state == MLX5_MPW_INL_STATE_OPENED)
1585 mlx5_mpw_inline_close(txq, &mpw);
1586 else if (mpw.state == MLX5_MPW_STATE_OPENED)
1587 mlx5_mpw_close(txq, &mpw);
1588 mlx5_tx_dbrec(txq, mpw.wqe);
1589 txq->elts_head = elts_head;
1594 * Open an Enhanced MPW session.
1597 * Pointer to TX queue structure.
1599 * Pointer to MPW session structure.
1604 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
1606 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
1608 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED;
1610 mpw->total_len = sizeof(struct mlx5_wqe);
1611 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
1613 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
1614 (txq->wqe_ci << 8) |
1615 MLX5_OPCODE_ENHANCED_MPSW);
1616 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
1617 MLX5_COMP_MODE_OFFSET);
1618 mpw->wqe->ctrl[3] = 0;
1619 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
1620 if (unlikely(padding)) {
1621 uintptr_t addr = (uintptr_t)(mpw->wqe + 1);
1623 /* Pad the first 2 DWORDs with zero-length inline header. */
1624 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG);
1625 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =
1626 rte_cpu_to_be_32(MLX5_INLINE_SEG);
1627 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;
1628 /* Start from the next WQEBB. */
1629 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));
1631 mpw->data.raw = (volatile void *)(mpw->wqe + 1);
1636 * Close an Enhanced MPW session.
1639 * Pointer to TX queue structure.
1641 * Pointer to MPW session structure.
1644 * Number of consumed WQEs.
1646 static inline uint16_t
1647 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
1651 /* Store size in multiple of 16 bytes. Control and Ethernet segments
1654 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
1655 MLX5_WQE_DS(mpw->total_len));
1656 mpw->state = MLX5_MPW_STATE_CLOSED;
1657 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
1663 * TX with Enhanced MPW support.
1666 * Pointer to TX queue structure.
1668 * Packets to transmit.
1670 * Number of packets in array.
1673 * Number of packets successfully transmitted (<= pkts_n).
1675 static inline uint16_t
1676 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
1679 uint16_t elts_head = txq->elts_head;
1680 const uint16_t elts_n = 1 << txq->elts_n;
1681 const uint16_t elts_m = elts_n - 1;
1686 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
1687 unsigned int mpw_room = 0;
1688 unsigned int inl_pad = 0;
1691 struct mlx5_mpw mpw = {
1692 .state = MLX5_MPW_STATE_CLOSED,
1695 if (unlikely(!pkts_n))
1697 /* Start processing. */
1698 mlx5_tx_complete(txq);
1699 max_elts = (elts_n - (elts_head - txq->elts_tail));
1700 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
1701 if (unlikely(!max_wqe))
1704 struct rte_mbuf *buf = *(pkts++);
1706 unsigned int do_inline = 0; /* Whether inline is possible. */
1709 rte_be32_t metadata;
1711 /* Multi-segmented packet is handled in slow-path outside. */
1712 assert(NB_SEGS(buf) == 1);
1713 /* Make sure there is enough room to store this packet. */
1714 if (max_elts - j == 0)
1716 cs_flags = txq_ol_cksum_to_cs(buf);
1717 /* Copy metadata from mbuf if valid */
1718 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1720 /* Retrieve packet information. */
1721 length = PKT_LEN(buf);
1722 /* Start new session if:
1723 * - multi-segment packet
1724 * - no space left even for a dseg
1725 * - next packet can be inlined with a new WQE
1728 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) {
1729 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) >
1731 (length <= txq->inline_max_packet_sz &&
1732 inl_pad + sizeof(inl_hdr) + length >
1734 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
1735 (mpw.wqe->eseg.cs_flags != cs_flags))
1736 max_wqe -= mlx5_empw_close(txq, &mpw);
1738 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) {
1739 /* In Enhanced MPW, inline as much as the budget is
1740 * allowed. The remaining space is to be filled with
1741 * dsegs. If the title WQEBB isn't padded, it will have
1744 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,
1745 (max_inline ? max_inline :
1746 pkts_n * MLX5_WQE_DWORD_SIZE) +
1748 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room))
1750 /* Don't pad the title WQEBB to not waste WQ. */
1751 mlx5_empw_new(txq, &mpw, 0);
1752 mpw_room -= mpw.total_len;
1754 do_inline = length <= txq->inline_max_packet_sz &&
1755 sizeof(inl_hdr) + length <= mpw_room &&
1757 mpw.wqe->eseg.cs_flags = cs_flags;
1758 mpw.wqe->eseg.flow_table_metadata = metadata;
1760 /* Evaluate whether the next packet can be inlined.
1761 * Inlininig is possible when:
1762 * - length is less than configured value
1763 * - length fits for remaining space
1764 * - not required to fill the title WQEBB with dsegs
1767 length <= txq->inline_max_packet_sz &&
1768 inl_pad + sizeof(inl_hdr) + length <=
1770 (!txq->mpw_hdr_dseg ||
1771 mpw.total_len >= MLX5_WQE_SIZE);
1773 if (max_inline && do_inline) {
1774 /* Inline packet into WQE. */
1777 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
1778 assert(length == DATA_LEN(buf));
1779 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG);
1780 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1781 mpw.data.raw = (volatile void *)
1782 ((uintptr_t)mpw.data.raw + inl_pad);
1783 max = tx_mlx5_wq_tailroom(txq,
1784 (void *)(uintptr_t)mpw.data.raw);
1785 /* Copy inline header. */
1786 mpw.data.raw = (volatile void *)
1788 (void *)(uintptr_t)mpw.data.raw,
1791 (void *)(uintptr_t)txq->wqes,
1793 max = tx_mlx5_wq_tailroom(txq,
1794 (void *)(uintptr_t)mpw.data.raw);
1795 /* Copy packet data. */
1796 mpw.data.raw = (volatile void *)
1798 (void *)(uintptr_t)mpw.data.raw,
1801 (void *)(uintptr_t)txq->wqes,
1804 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length);
1805 /* No need to get completion as the entire packet is
1806 * copied to WQ. Free the buf right away.
1808 rte_pktmbuf_free_seg(buf);
1809 mpw_room -= (inl_pad + sizeof(inl_hdr) + length);
1810 /* Add pad in the next packet if any. */
1811 inl_pad = (((uintptr_t)mpw.data.raw +
1812 (MLX5_WQE_DWORD_SIZE - 1)) &
1813 ~(MLX5_WQE_DWORD_SIZE - 1)) -
1814 (uintptr_t)mpw.data.raw;
1816 /* No inline. Load a dseg of packet pointer. */
1817 volatile rte_v128u32_t *dseg;
1819 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
1820 assert((inl_pad + sizeof(*dseg)) <= mpw_room);
1821 assert(length == DATA_LEN(buf));
1822 if (!tx_mlx5_wq_tailroom(txq,
1823 (void *)((uintptr_t)mpw.data.raw
1825 dseg = (volatile void *)txq->wqes;
1827 dseg = (volatile void *)
1828 ((uintptr_t)mpw.data.raw +
1830 (*txq->elts)[elts_head++ & elts_m] = buf;
1831 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1833 *dseg = (rte_v128u32_t) {
1834 rte_cpu_to_be_32(length),
1835 mlx5_tx_mb2mr(txq, buf),
1839 mpw.data.raw = (volatile void *)(dseg + 1);
1840 mpw.total_len += (inl_pad + sizeof(*dseg));
1843 mpw_room -= (inl_pad + sizeof(*dseg));
1846 #ifdef MLX5_PMD_SOFT_COUNTERS
1847 /* Increment sent bytes counter. */
1848 txq->stats.obytes += length;
1851 } while (i < pkts_n);
1852 /* Take a shortcut if nothing must be sent. */
1853 if (unlikely(i == 0))
1855 /* Check whether completion threshold has been reached. */
1856 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH ||
1857 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >=
1858 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
1859 volatile struct mlx5_wqe *wqe = mpw.wqe;
1861 /* A CQE slot must always be available. */
1862 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
1863 /* Request completion on last WQE. */
1864 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
1865 MLX5_COMP_MODE_OFFSET);
1866 /* Save elts_head in unused "immediate" field of WQE. */
1867 wqe->ctrl[3] = elts_head;
1869 txq->mpw_comp = txq->wqe_ci;
1871 txq->elts_comp += j;
1873 #ifdef MLX5_PMD_SOFT_COUNTERS
1874 /* Increment sent packets counter. */
1875 txq->stats.opackets += i;
1877 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED)
1878 mlx5_empw_close(txq, &mpw);
1879 /* Ring QP doorbell. */
1880 mlx5_tx_dbrec(txq, mpw.wqe);
1881 txq->elts_head = elts_head;
1886 * DPDK callback for TX with Enhanced MPW support.
1889 * Generic pointer to TX queue structure.
1891 * Packets to transmit.
1893 * Number of packets in array.
1896 * Number of packets successfully transmitted (<= pkts_n).
1899 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
1901 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
1904 while (pkts_n > nb_tx) {
1908 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx);
1910 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n);
1915 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx);
1917 ret = txq_burst_empw(txq, &pkts[nb_tx], n);
1927 * Translate RX completion flags to packet type.
1930 * Pointer to RX queue structure.
1934 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
1937 * Packet type for struct rte_mbuf.
1939 static inline uint32_t
1940 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
1943 uint8_t pinfo = cqe->pkt_info;
1944 uint16_t ptype = cqe->hdr_type_etc;
1947 * The index to the array should have:
1948 * bit[1:0] = l3_hdr_type
1949 * bit[4:2] = l4_hdr_type
1952 * bit[7] = outer_l3_type
1954 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
1955 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
1959 * Initialize Rx WQ and indexes.
1962 * Pointer to RX queue structure.
1965 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
1967 const unsigned int wqe_n = 1 << rxq->elts_n;
1970 for (i = 0; (i != wqe_n); ++i) {
1971 volatile struct mlx5_wqe_data_seg *scat;
1973 uint32_t byte_count;
1975 if (mlx5_rxq_mprq_enabled(rxq)) {
1976 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
1978 scat = &((volatile struct mlx5_wqe_mprq *)
1980 addr = (uintptr_t)mlx5_mprq_buf_addr(buf);
1981 byte_count = (1 << rxq->strd_sz_n) *
1982 (1 << rxq->strd_num_n);
1984 struct rte_mbuf *buf = (*rxq->elts)[i];
1986 scat = &((volatile struct mlx5_wqe_data_seg *)
1988 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1989 byte_count = DATA_LEN(buf);
1991 /* scat->addr must be able to store a pointer. */
1992 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1993 *scat = (struct mlx5_wqe_data_seg){
1994 .addr = rte_cpu_to_be_64(addr),
1995 .byte_count = rte_cpu_to_be_32(byte_count),
1996 .lkey = mlx5_rx_addr2mr(rxq, addr),
1999 rxq->consumed_strd = 0;
2000 rxq->decompressed = 0;
2002 rxq->zip = (struct rxq_zip){
2005 /* Update doorbell counter. */
2006 rxq->rq_ci = wqe_n >> rxq->sges_n;
2008 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2012 * Modify a Verbs queue state.
2013 * This must be called from the primary process.
2016 * Pointer to Ethernet device.
2018 * State modify request parameters.
2021 * 0 in case of success else non-zero value and rte_errno is set.
2024 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
2025 const struct mlx5_mp_arg_queue_state_modify *sm)
2028 struct mlx5_priv *priv = dev->data->dev_private;
2031 struct ibv_wq_attr mod = {
2032 .attr_mask = IBV_WQ_ATTR_STATE,
2033 .wq_state = sm->state,
2035 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id];
2036 struct mlx5_rxq_ctrl *rxq_ctrl =
2037 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
2039 ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
2041 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n",
2042 sm->state, strerror(errno));
2047 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id];
2048 struct mlx5_txq_ctrl *txq_ctrl =
2049 container_of(txq, struct mlx5_txq_ctrl, txq);
2050 struct ibv_qp_attr mod = {
2051 .qp_state = IBV_QPS_RESET,
2052 .port_num = (uint8_t)priv->ibv_port,
2054 struct ibv_qp *qp = txq_ctrl->ibv->qp;
2056 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
2058 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET "
2059 "%s\n", strerror(errno));
2063 mod.qp_state = IBV_QPS_INIT;
2064 ret = mlx5_glue->modify_qp(qp, &mod,
2065 (IBV_QP_STATE | IBV_QP_PORT));
2067 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s\n",
2072 mod.qp_state = IBV_QPS_RTR;
2073 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
2075 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s\n",
2080 mod.qp_state = IBV_QPS_RTS;
2081 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
2083 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s\n",
2093 * Modify a Verbs queue state.
2096 * Pointer to Ethernet device.
2098 * State modify request parameters.
2101 * 0 in case of success else non-zero value.
2104 mlx5_queue_state_modify(struct rte_eth_dev *dev,
2105 struct mlx5_mp_arg_queue_state_modify *sm)
2109 switch (rte_eal_process_type()) {
2110 case RTE_PROC_PRIMARY:
2111 ret = mlx5_queue_state_modify_primary(dev, sm);
2113 case RTE_PROC_SECONDARY:
2114 ret = mlx5_mp_req_queue_state_modify(dev, sm);
2123 * Handle a Rx error.
2124 * The function inserts the RQ state to reset when the first error CQE is
2125 * shown, then drains the CQ by the caller function loop. When the CQ is empty,
2126 * it moves the RQ state to ready and initializes the RQ.
2127 * Next CQE identification and error counting are in the caller responsibility.
2130 * Pointer to RX queue structure.
2131 * @param[in] mbuf_prepare
2132 * Whether to prepare mbufs for the RQ.
2135 * -1 in case of recovery error, otherwise the CQE status.
2138 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
2140 const uint16_t cqe_n = 1 << rxq->cqe_n;
2141 const uint16_t cqe_mask = cqe_n - 1;
2142 const unsigned int wqe_n = 1 << rxq->elts_n;
2143 struct mlx5_rxq_ctrl *rxq_ctrl =
2144 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
2146 volatile struct mlx5_cqe *cqe;
2147 volatile struct mlx5_err_cqe *err_cqe;
2149 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
2151 struct mlx5_mp_arg_queue_state_modify sm;
2154 switch (rxq->err_state) {
2155 case MLX5_RXQ_ERR_STATE_NO_ERROR:
2156 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
2158 case MLX5_RXQ_ERR_STATE_NEED_RESET:
2160 sm.queue_id = rxq->idx;
2161 sm.state = IBV_WQS_RESET;
2162 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
2164 if (rxq_ctrl->dump_file_n <
2165 rxq_ctrl->priv->config.max_dump_files_num) {
2166 MKSTR(err_str, "Unexpected CQE error syndrome "
2167 "0x%02x CQN = %u RQN = %u wqe_counter = %u"
2168 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
2169 rxq->cqn, rxq_ctrl->wqn,
2170 rte_be_to_cpu_16(u.err_cqe->wqe_counter),
2171 rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
2172 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
2173 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
2174 mlx5_dump_debug_information(name, NULL, err_str, 0);
2175 mlx5_dump_debug_information(name, "MLX5 Error CQ:",
2176 (const void *)((uintptr_t)
2178 sizeof(*u.cqe) * cqe_n);
2179 mlx5_dump_debug_information(name, "MLX5 Error RQ:",
2180 (const void *)((uintptr_t)
2183 rxq_ctrl->dump_file_n++;
2185 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
2187 case MLX5_RXQ_ERR_STATE_NEED_READY:
2188 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
2189 if (ret == MLX5_CQE_STATUS_HW_OWN) {
2191 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2194 * The RQ consumer index must be zeroed while moving
2195 * from RESET state to RDY state.
2197 *rxq->rq_db = rte_cpu_to_be_32(0);
2200 sm.queue_id = rxq->idx;
2201 sm.state = IBV_WQS_RDY;
2202 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
2206 const uint16_t q_mask = wqe_n - 1;
2208 struct rte_mbuf **elt;
2210 unsigned int n = wqe_n - (rxq->rq_ci -
2213 for (i = 0; i < (int)n; ++i) {
2214 elt_idx = (rxq->rq_ci + i) & q_mask;
2215 elt = &(*rxq->elts)[elt_idx];
2216 *elt = rte_mbuf_raw_alloc(rxq->mp);
2218 for (i--; i >= 0; --i) {
2219 elt_idx = (rxq->rq_ci +
2223 rte_pktmbuf_free_seg
2230 mlx5_rxq_initialize(rxq);
2231 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
2240 * Get size of the next packet for a given CQE. For compressed CQEs, the
2241 * consumer index is updated only once all packets of the current one have
2245 * Pointer to RX queue.
2249 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
2253 * 0 in case of empty CQE, otherwise the packet size in bytes.
2256 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
2257 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
2259 struct rxq_zip *zip = &rxq->zip;
2260 uint16_t cqe_n = cqe_cnt + 1;
2266 /* Process compressed data in the CQE and mini arrays. */
2268 volatile struct mlx5_mini_cqe8 (*mc)[8] =
2269 (volatile struct mlx5_mini_cqe8 (*)[8])
2270 (uintptr_t)(&(*rxq->cqes)[zip->ca &
2273 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
2274 *mcqe = &(*mc)[zip->ai & 7];
2275 if ((++zip->ai & 7) == 0) {
2276 /* Invalidate consumed CQEs */
2279 while (idx != end) {
2280 (*rxq->cqes)[idx & cqe_cnt].op_own =
2281 MLX5_CQE_INVALIDATE;
2285 * Increment consumer index to skip the number
2286 * of CQEs consumed. Hardware leaves holes in
2287 * the CQ ring for software use.
2292 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
2293 /* Invalidate the rest */
2297 while (idx != end) {
2298 (*rxq->cqes)[idx & cqe_cnt].op_own =
2299 MLX5_CQE_INVALIDATE;
2302 rxq->cq_ci = zip->cq_ci;
2306 * No compressed data, get next CQE and verify if it is
2313 ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
2314 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
2315 if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
2317 ret = mlx5_rx_err_handle(rxq, 0);
2318 if (ret == MLX5_CQE_STATUS_HW_OWN ||
2326 op_own = cqe->op_own;
2327 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
2328 volatile struct mlx5_mini_cqe8 (*mc)[8] =
2329 (volatile struct mlx5_mini_cqe8 (*)[8])
2330 (uintptr_t)(&(*rxq->cqes)
2334 /* Fix endianness. */
2335 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
2337 * Current mini array position is the one
2338 * returned by check_cqe64().
2340 * If completion comprises several mini arrays,
2341 * as a special case the second one is located
2342 * 7 CQEs after the initial CQE instead of 8
2343 * for subsequent ones.
2345 zip->ca = rxq->cq_ci;
2346 zip->na = zip->ca + 7;
2347 /* Compute the next non compressed CQE. */
2349 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
2350 /* Get packet size to return. */
2351 len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
2354 /* Prefetch all to be invalidated */
2357 while (idx != end) {
2358 rte_prefetch0(&(*rxq->cqes)[(idx) &
2363 len = rte_be_to_cpu_32(cqe->byte_cnt);
2366 if (unlikely(rxq->err_state)) {
2367 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
2368 ++rxq->stats.idropped;
2376 * Translate RX completion flags to offload flags.
2382 * Offload flags (ol_flags) for struct rte_mbuf.
2384 static inline uint32_t
2385 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
2387 uint32_t ol_flags = 0;
2388 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
2392 MLX5_CQE_RX_L3_HDR_VALID,
2393 PKT_RX_IP_CKSUM_GOOD) |
2395 MLX5_CQE_RX_L4_HDR_VALID,
2396 PKT_RX_L4_CKSUM_GOOD);
2401 * Fill in mbuf fields from RX completion flags.
2402 * Note that pkt->ol_flags should be initialized outside of this function.
2405 * Pointer to RX queue.
2410 * @param rss_hash_res
2411 * Packet RSS Hash result.
2414 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
2415 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res)
2417 /* Update packet information. */
2418 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);
2419 if (rss_hash_res && rxq->rss_hash) {
2420 pkt->hash.rss = rss_hash_res;
2421 pkt->ol_flags |= PKT_RX_RSS_HASH;
2423 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) {
2424 pkt->ol_flags |= PKT_RX_FDIR;
2425 if (cqe->sop_drop_qpn !=
2426 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) {
2427 uint32_t mark = cqe->sop_drop_qpn;
2429 pkt->ol_flags |= PKT_RX_FDIR_ID;
2430 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
2434 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
2435 if (rxq->vlan_strip &&
2436 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) {
2437 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
2438 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
2440 if (rxq->hw_timestamp) {
2441 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp);
2442 pkt->ol_flags |= PKT_RX_TIMESTAMP;
2447 * DPDK callback for RX.
2450 * Generic pointer to RX queue structure.
2452 * Array to store received packets.
2454 * Maximum number of packets in array.
2457 * Number of packets successfully received (<= pkts_n).
2460 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
2462 struct mlx5_rxq_data *rxq = dpdk_rxq;
2463 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
2464 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
2465 const unsigned int sges_n = rxq->sges_n;
2466 struct rte_mbuf *pkt = NULL;
2467 struct rte_mbuf *seg = NULL;
2468 volatile struct mlx5_cqe *cqe =
2469 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
2471 unsigned int rq_ci = rxq->rq_ci << sges_n;
2472 int len = 0; /* keep its value across iterations. */
2475 unsigned int idx = rq_ci & wqe_cnt;
2476 volatile struct mlx5_wqe_data_seg *wqe =
2477 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
2478 struct rte_mbuf *rep = (*rxq->elts)[idx];
2479 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
2480 uint32_t rss_hash_res;
2488 rep = rte_mbuf_raw_alloc(rxq->mp);
2489 if (unlikely(rep == NULL)) {
2490 ++rxq->stats.rx_nombuf;
2493 * no buffers before we even started,
2494 * bail out silently.
2498 while (pkt != seg) {
2499 assert(pkt != (*rxq->elts)[idx]);
2503 rte_mbuf_raw_free(pkt);
2509 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
2510 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
2512 rte_mbuf_raw_free(rep);
2516 assert(len >= (rxq->crc_present << 2));
2518 /* If compressed, take hash result from mini-CQE. */
2519 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
2521 mcqe->rx_hash_result);
2522 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
2523 if (rxq->crc_present)
2524 len -= RTE_ETHER_CRC_LEN;
2527 DATA_LEN(rep) = DATA_LEN(seg);
2528 PKT_LEN(rep) = PKT_LEN(seg);
2529 SET_DATA_OFF(rep, DATA_OFF(seg));
2530 PORT(rep) = PORT(seg);
2531 (*rxq->elts)[idx] = rep;
2533 * Fill NIC descriptor with the new buffer. The lkey and size
2534 * of the buffers are already known, only the buffer address
2537 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
2538 /* If there's only one MR, no need to replace LKey in WQE. */
2539 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
2540 wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
2541 if (len > DATA_LEN(seg)) {
2542 len -= DATA_LEN(seg);
2547 DATA_LEN(seg) = len;
2548 #ifdef MLX5_PMD_SOFT_COUNTERS
2549 /* Increment bytes counter. */
2550 rxq->stats.ibytes += PKT_LEN(pkt);
2552 /* Return packet. */
2557 /* Align consumer index to the next stride. */
2562 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
2564 /* Update the consumer index. */
2565 rxq->rq_ci = rq_ci >> sges_n;
2567 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2569 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2570 #ifdef MLX5_PMD_SOFT_COUNTERS
2571 /* Increment packets counter. */
2572 rxq->stats.ipackets += i;
2578 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
2580 struct mlx5_mprq_buf *buf = opaque;
2582 if (rte_atomic16_read(&buf->refcnt) == 1) {
2583 rte_mempool_put(buf->mp, buf);
2584 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) {
2585 rte_atomic16_set(&buf->refcnt, 1);
2586 rte_mempool_put(buf->mp, buf);
2591 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
2593 mlx5_mprq_buf_free_cb(NULL, buf);
2597 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
2599 struct mlx5_mprq_buf *rep = rxq->mprq_repl;
2600 volatile struct mlx5_wqe_data_seg *wqe =
2601 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
2604 assert(rep != NULL);
2605 /* Replace MPRQ buf. */
2606 (*rxq->mprq_bufs)[rq_idx] = rep;
2608 addr = mlx5_mprq_buf_addr(rep);
2609 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
2610 /* If there's only one MR, no need to replace LKey in WQE. */
2611 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
2612 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
2613 /* Stash a mbuf for next replacement. */
2614 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
2615 rxq->mprq_repl = rep;
2617 rxq->mprq_repl = NULL;
2621 * DPDK callback for RX with Multi-Packet RQ support.
2624 * Generic pointer to RX queue structure.
2626 * Array to store received packets.
2628 * Maximum number of packets in array.
2631 * Number of packets successfully received (<= pkts_n).
2634 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
2636 struct mlx5_rxq_data *rxq = dpdk_rxq;
2637 const unsigned int strd_n = 1 << rxq->strd_num_n;
2638 const unsigned int strd_sz = 1 << rxq->strd_sz_n;
2639 const unsigned int strd_shift =
2640 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
2641 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
2642 const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
2643 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
2645 uint32_t rq_ci = rxq->rq_ci;
2646 uint16_t consumed_strd = rxq->consumed_strd;
2647 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
2649 while (i < pkts_n) {
2650 struct rte_mbuf *pkt;
2658 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
2659 uint32_t rss_hash_res = 0;
2661 if (consumed_strd == strd_n) {
2662 /* Replace WQE only if the buffer is still in use. */
2663 if (rte_atomic16_read(&buf->refcnt) > 1) {
2664 mprq_buf_replace(rxq, rq_ci & wq_mask);
2665 /* Release the old buffer. */
2666 mlx5_mprq_buf_free(buf);
2667 } else if (unlikely(rxq->mprq_repl == NULL)) {
2668 struct mlx5_mprq_buf *rep;
2671 * Currently, the MPRQ mempool is out of buffer
2672 * and doing memcpy regardless of the size of Rx
2673 * packet. Retry allocation to get back to
2676 if (!rte_mempool_get(rxq->mprq_mp,
2678 rxq->mprq_repl = rep;
2680 /* Advance to the next WQE. */
2683 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
2685 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
2686 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
2690 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
2691 MLX5_MPRQ_STRIDE_NUM_SHIFT;
2693 consumed_strd += strd_cnt;
2694 if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
2697 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
2698 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
2700 /* mini-CQE for MPRQ doesn't have hash result. */
2701 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
2703 assert(strd_idx < strd_n);
2704 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
2706 * Currently configured to receive a packet per a stride. But if
2707 * MTU is adjusted through kernel interface, device could
2708 * consume multiple strides without raising an error. In this
2709 * case, the packet should be dropped because it is bigger than
2710 * the max_rx_pkt_len.
2712 if (unlikely(strd_cnt > 1)) {
2713 ++rxq->stats.idropped;
2716 pkt = rte_pktmbuf_alloc(rxq->mp);
2717 if (unlikely(pkt == NULL)) {
2718 ++rxq->stats.rx_nombuf;
2721 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
2722 assert((int)len >= (rxq->crc_present << 2));
2723 if (rxq->crc_present)
2724 len -= RTE_ETHER_CRC_LEN;
2725 offset = strd_idx * strd_sz + strd_shift;
2726 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
2727 /* Initialize the offload flag. */
2730 * Memcpy packets to the target mbuf if:
2731 * - The size of packet is smaller than mprq_max_memcpy_len.
2732 * - Out of buffer in the Mempool for Multi-Packet RQ.
2734 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
2736 * When memcpy'ing packet due to out-of-buffer, the
2737 * packet must be smaller than the target mbuf.
2739 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
2740 rte_pktmbuf_free_seg(pkt);
2741 ++rxq->stats.idropped;
2744 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len);
2746 rte_iova_t buf_iova;
2747 struct rte_mbuf_ext_shared_info *shinfo;
2748 uint16_t buf_len = strd_cnt * strd_sz;
2750 /* Increment the refcnt of the whole chunk. */
2751 rte_atomic16_add_return(&buf->refcnt, 1);
2752 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <=
2754 addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
2756 * MLX5 device doesn't use iova but it is necessary in a
2757 * case where the Rx packet is transmitted via a
2760 buf_iova = rte_mempool_virt2iova(buf) +
2761 RTE_PTR_DIFF(addr, buf);
2762 shinfo = rte_pktmbuf_ext_shinfo_init_helper(addr,
2763 &buf_len, mlx5_mprq_buf_free_cb, buf);
2765 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
2766 * attaching the stride to mbuf and more offload flags
2767 * will be added below by calling rxq_cq_to_mbuf().
2768 * Other fields will be overwritten.
2770 rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len,
2772 rte_pktmbuf_reset_headroom(pkt);
2773 assert(pkt->ol_flags == EXT_ATTACHED_MBUF);
2775 * Prevent potential overflow due to MTU change through
2778 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
2779 rte_pktmbuf_free_seg(pkt);
2780 ++rxq->stats.idropped;
2784 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
2786 DATA_LEN(pkt) = len;
2787 PORT(pkt) = rxq->port_id;
2788 #ifdef MLX5_PMD_SOFT_COUNTERS
2789 /* Increment bytes counter. */
2790 rxq->stats.ibytes += PKT_LEN(pkt);
2792 /* Return packet. */
2796 /* Update the consumer indexes. */
2797 rxq->consumed_strd = consumed_strd;
2799 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2800 if (rq_ci != rxq->rq_ci) {
2803 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2805 #ifdef MLX5_PMD_SOFT_COUNTERS
2806 /* Increment packets counter. */
2807 rxq->stats.ipackets += i;
2813 * Dummy DPDK callback for TX.
2815 * This function is used to temporarily replace the real callback during
2816 * unsafe control operations on the queue, or in case of error.
2819 * Generic pointer to TX queue structure.
2821 * Packets to transmit.
2823 * Number of packets in array.
2826 * Number of packets successfully transmitted (<= pkts_n).
2829 removed_tx_burst(void *dpdk_txq __rte_unused,
2830 struct rte_mbuf **pkts __rte_unused,
2831 uint16_t pkts_n __rte_unused)
2838 * Dummy DPDK callback for RX.
2840 * This function is used to temporarily replace the real callback during
2841 * unsafe control operations on the queue, or in case of error.
2844 * Generic pointer to RX queue structure.
2846 * Array to store received packets.
2848 * Maximum number of packets in array.
2851 * Number of packets successfully received (<= pkts_n).
2854 removed_rx_burst(void *dpdk_txq __rte_unused,
2855 struct rte_mbuf **pkts __rte_unused,
2856 uint16_t pkts_n __rte_unused)
2863 * Vectorized Rx/Tx routines are not compiled in when required vector
2864 * instructions are not supported on a target architecture. The following null
2865 * stubs are needed for linkage when those are not included outside of this file
2866 * (e.g. mlx5_rxtx_vec_sse.c for x86).
2870 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
2871 struct rte_mbuf **pkts __rte_unused,
2872 uint16_t pkts_n __rte_unused)
2878 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
2879 struct rte_mbuf **pkts __rte_unused,
2880 uint16_t pkts_n __rte_unused)
2886 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
2887 struct rte_mbuf **pkts __rte_unused,
2888 uint16_t pkts_n __rte_unused)
2894 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
2900 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
2906 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
2912 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)