X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=fe9e7eac0ba7fbd963c6d4f925fc1c24ba5a6a7a;hb=9f9bebae55309c1230d2b9a81ece2f94f1eeda69;hp=b0551cccf6085323b6095bf512def0f49b596661;hpb=c9c2a965ab83ca224663abe5ce38f774a081e83e;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index b0551cccf6..fe9e7eac0b 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -48,19 +48,12 @@ #pragma GCC diagnostic error "-Wpedantic" #endif -/* DPDK headers don't like -pedantic. */ -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif #include #include #include #include #include #include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif #include "mlx5.h" #include "mlx5_utils.h" @@ -69,279 +62,187 @@ #include "mlx5_defs.h" #include "mlx5_prm.h" -static inline int -check_cqe(volatile struct mlx5_cqe *cqe, - unsigned int cqes_n, const uint16_t ci) - __attribute__((always_inline)); - -static inline void -txq_complete(struct txq *txq) __attribute__((always_inline)); - -static inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp) - __attribute__((always_inline)); - -static inline void -mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) - __attribute__((always_inline)); - -static inline uint32_t -rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) - __attribute__((always_inline)); +static __rte_always_inline uint32_t +rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe); -static inline int +static __rte_always_inline int mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, - uint16_t cqe_cnt, uint32_t *rss_hash) - __attribute__((always_inline)); + uint16_t cqe_cnt, uint32_t *rss_hash); -static inline uint32_t -rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) - __attribute__((always_inline)); +static __rte_always_inline uint32_t +rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe); -#ifndef NDEBUG +uint32_t mlx5_ptype_table[] __rte_cache_aligned = { + [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ +}; /** - * Verify or set magic value in CQE. - * - * @param cqe - * Pointer to CQE. + * Build a table to translate Rx completion flags to packet type. * - * @return - * 0 the first time. + * @note: fix mlx5_dev_supported_ptypes_get() if any change here. */ -static inline int -check_cqe_seen(volatile struct mlx5_cqe *cqe) +void +mlx5_set_ptype_table(void) { - static const uint8_t magic[] = "seen"; - volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0; - int ret = 1; unsigned int i; + uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; - for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i) - if (!ret || (*buf)[i] != magic[i]) { - ret = 0; - (*buf)[i] = magic[i]; - } - return ret; -} - -#endif /* NDEBUG */ - -/** - * Check whether CQE is valid. - * - * @param cqe - * Pointer to CQE. - * @param cqes_n - * Size of completion queue. - * @param ci - * Consumer index. - * - * @return - * 0 on success, 1 on failure. - */ -static inline int -check_cqe(volatile struct mlx5_cqe *cqe, - unsigned int cqes_n, const uint16_t ci) -{ - uint16_t idx = ci & cqes_n; - uint8_t op_own = cqe->op_own; - uint8_t op_owner = MLX5_CQE_OWNER(op_own); - uint8_t op_code = MLX5_CQE_OPCODE(op_own); - - if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) - return 1; /* No CQE. */ -#ifndef NDEBUG - if ((op_code == MLX5_CQE_RESP_ERR) || - (op_code == MLX5_CQE_REQ_ERR)) { - volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe; - uint8_t syndrome = err_cqe->syndrome; - - if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) || - (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) - return 0; - if (!check_cqe_seen(cqe)) - ERROR("unexpected CQE error %u (0x%02x)" - " syndrome 0x%02x", - op_code, op_code, syndrome); - return 1; - } else if ((op_code != MLX5_CQE_RESP_SEND) && - (op_code != MLX5_CQE_REQ)) { - if (!check_cqe_seen(cqe)) - ERROR("unexpected CQE opcode %u (0x%02x)", - op_code, op_code); - return 1; - } -#endif /* NDEBUG */ - return 0; -} - -/** - * Return the address of the WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe_ci - * WQE consumer index. - * - * @return - * WQE address. - */ -static inline uintptr_t * -tx_mlx5_wqe(struct txq *txq, uint16_t ci) -{ - ci &= ((1 << txq->wqe_n) - 1); - return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE); + /* Last entry must not be overwritten, reserved for errored packet. */ + for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) + (*p)[i] = RTE_PTYPE_UNKNOWN; + /* + * The index to the array should have: + * bit[1:0] = l3_hdr_type + * bit[4:2] = l4_hdr_type + * bit[5] = ip_frag + * bit[6] = tunneled + * bit[7] = outer_l3_type + */ + /* L3 */ + (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG; + (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG; + /* Fragmented */ + (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG; + (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG; + /* TCP */ + (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + /* UDP */ + (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + /* Repeat with outer_l3_type being set. Just in case. */ + (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG; + (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG; + (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG; + (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG; + (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + /* Tunneled - L3 */ + (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG; + (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG; + (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG; + (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG; + /* Tunneled - Fragmented */ + (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG; + (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG; + (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG; + (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG; + /* Tunneled - TCP */ + (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + /* Tunneled - UDP */ + (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; + (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP; } /** - * Manage TX completions. - * - * When sending a burst, mlx5_tx_burst() posts several WRs. + * Return the size of tailroom of WQ. * * @param txq * Pointer to TX queue structure. - */ -static inline void -txq_complete(struct txq *txq) -{ - const unsigned int elts_n = 1 << txq->elts_n; - const unsigned int cqe_n = 1 << txq->cqe_n; - const unsigned int cqe_cnt = cqe_n - 1; - uint16_t elts_free = txq->elts_tail; - uint16_t elts_tail; - uint16_t cq_ci = txq->cq_ci; - volatile struct mlx5_cqe *cqe = NULL; - volatile struct mlx5_wqe_ctrl *ctrl; - - do { - volatile struct mlx5_cqe *tmp; - - tmp = &(*txq->cqes)[cq_ci & cqe_cnt]; - if (check_cqe(tmp, cqe_n, cq_ci)) - break; - cqe = tmp; -#ifndef NDEBUG - if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) { - if (!check_cqe_seen(cqe)) - ERROR("unexpected compressed CQE, TX stopped"); - return; - } - if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || - (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { - if (!check_cqe_seen(cqe)) - ERROR("unexpected error CQE, TX stopped"); - return; - } -#endif /* NDEBUG */ - ++cq_ci; - } while (1); - if (unlikely(cqe == NULL)) - return; - txq->wqe_pi = ntohs(cqe->wqe_counter); - ctrl = (volatile struct mlx5_wqe_ctrl *) - tx_mlx5_wqe(txq, txq->wqe_pi); - elts_tail = ctrl->ctrl3; - assert(elts_tail < (1 << txq->wqe_n)); - /* Free buffers. */ - while (elts_free != elts_tail) { - struct rte_mbuf *elt = (*txq->elts)[elts_free]; - unsigned int elts_free_next = - (elts_free + 1) & (elts_n - 1); - struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next]; - -#ifndef NDEBUG - /* Poisoning. */ - memset(&(*txq->elts)[elts_free], - 0x66, - sizeof((*txq->elts)[elts_free])); -#endif - RTE_MBUF_PREFETCH_TO_FREE(elt_next); - /* Only one segment needs to be freed. */ - rte_pktmbuf_free_seg(elt); - elts_free = elts_free_next; - } - txq->cq_ci = cq_ci; - txq->elts_tail = elts_tail; - /* Update the consumer index. */ - rte_wmb(); - *txq->cq_db = htonl(cq_ci); -} - -/** - * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which - * the cloned mbuf is allocated is returned instead. - * - * @param buf - * Pointer to mbuf. + * @param addr + * Pointer to tail of WQ. * * @return - * Memory pool where data is located for given mbuf. + * Size of tailroom. */ -static struct rte_mempool * -txq_mb2mp(struct rte_mbuf *buf) +static inline size_t +tx_mlx5_wq_tailroom(struct txq *txq, void *addr) { - if (unlikely(RTE_MBUF_INDIRECT(buf))) - return rte_mbuf_from_indirect(buf)->pool; - return buf->pool; + size_t tailroom; + tailroom = (uintptr_t)(txq->wqes) + + (1 << txq->wqe_n) * MLX5_WQE_SIZE - + (uintptr_t)addr; + return tailroom; } /** - * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[]. - * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, - * remove an entry first. + * Copy data to tailroom of circular queue. * - * @param txq - * Pointer to TX queue structure. - * @param[in] mp - * Memory Pool for which a Memory Region lkey must be returned. + * @param dst + * Pointer to destination. + * @param src + * Pointer to source. + * @param n + * Number of bytes to copy. + * @param base + * Pointer to head of queue. + * @param tailroom + * Size of tailroom from dst. * * @return - * mr->lkey on success, (uint32_t)-1 on failure. + * Pointer after copied data. */ -static inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp) +static inline void * +mlx5_copy_to_wq(void *dst, const void *src, size_t n, + void *base, size_t tailroom) { - unsigned int i; - uint32_t lkey = (uint32_t)-1; + void *ret; - for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i].mp == NULL)) { - /* Unknown MP, add a new MR for it. */ - break; - } - if (txq->mp2mr[i].mp == mp) { - assert(txq->mp2mr[i].lkey != (uint32_t)-1); - assert(htonl(txq->mp2mr[i].mr->lkey) == - txq->mp2mr[i].lkey); - lkey = txq->mp2mr[i].lkey; - break; - } + if (n > tailroom) { + rte_memcpy(dst, src, tailroom); + rte_memcpy(base, (void *)((uintptr_t)src + tailroom), + n - tailroom); + ret = (uint8_t *)base + n - tailroom; + } else { + rte_memcpy(dst, src, n); + ret = (n == tailroom) ? base : (uint8_t *)dst + n; } - if (unlikely(lkey == (uint32_t)-1)) - lkey = txq_mp2mr_reg(txq, mp, i); - return lkey; -} - -/** - * Ring TX queue doorbell. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the last WQE posted in the NIC. - */ -static inline void -mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) -{ - uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); - volatile uint64_t *src = ((volatile uint64_t *)wqe); - - rte_wmb(); - *txq->qp_db = htonl(txq->wqe_ci); - /* Ensure ordering between DB record and BF copy. */ - rte_wmb(); - *dst = *src; + return ret; } /** @@ -359,12 +260,10 @@ int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) { struct txq *txq = tx_queue; - const unsigned int elts_n = 1 << txq->elts_n; - const unsigned int elts_cnt = elts_n - 1; - unsigned int used; + uint16_t used; - txq_complete(txq); - used = (txq->elts_head - txq->elts_tail) & elts_cnt; + mlx5_tx_complete(txq); + used = txq->elts_head - txq->elts_tail; if (offset < used) return RTE_ETH_TX_DESC_FULL; return RTE_ETH_TX_DESC_DONE; @@ -438,13 +337,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = 1 << txq->elts_n; + const uint16_t elts_n = 1 << txq->elts_n; + const uint16_t elts_m = elts_n - 1; unsigned int i = 0; unsigned int j = 0; - unsigned int max; + unsigned int k = 0; + uint16_t max_elts; + unsigned int max_inline = txq->max_inline; + const unsigned int inline_en = !!max_inline && txq->inline_en; uint16_t max_wqe; unsigned int comp; volatile struct mlx5_wqe_v *wqe = NULL; + volatile struct mlx5_wqe_ctrl *last_wqe = NULL; unsigned int segs_n = 0; struct rte_mbuf *buf = NULL; uint8_t *raw; @@ -454,10 +358,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Prefetch first packet cacheline. */ rte_prefetch0(*pkts); /* Start processing. */ - txq_complete(txq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; + mlx5_tx_complete(txq); + max_elts = (elts_n - (elts_head - txq->elts_tail)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; @@ -465,36 +367,38 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) volatile rte_v128u32_t *dseg = NULL; uint32_t length; unsigned int ds = 0; + unsigned int sg = 0; /* counter of additional segs attached. */ uintptr_t addr; uint64_t naddr; uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; + uint16_t tso_header_sz = 0; uint16_t ehdr; uint8_t cs_flags = 0; + uint64_t tso = 0; + uint16_t tso_segsz = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; #endif /* first_seg */ - buf = *(pkts++); + buf = *pkts; segs_n = buf->nb_segs; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. */ assert(segs_n); - if (max < segs_n + 1) + if (max_elts < segs_n) break; - max -= segs_n; + max_elts -= segs_n; --segs_n; - if (!segs_n) - --pkts_n; if (unlikely(--max_wqe == 0)) break; wqe = (volatile struct mlx5_wqe_v *) tx_mlx5_wqe(txq, txq->wqe_ci); rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); - if (pkts_n > 1) - rte_prefetch0(*pkts); + if (pkts_n - i > 1) + rte_prefetch0(*(pkts + 1)); addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); ehdr = (((uint8_t *)addr)[1] << 8) | @@ -502,21 +406,30 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) #ifdef MLX5_PMD_SOFT_COUNTERS total_length = length; #endif - assert(length >= MLX5_WQE_DWORD_SIZE); + if (length < (MLX5_WQE_DWORD_SIZE + 2)) + break; /* Update element. */ - (*txq->elts)[elts_head] = buf; - elts_head = (elts_head + 1) & (elts_n - 1); + (*txq->elts)[elts_head & elts_m] = buf; /* Prefetch next buffer data. */ - if (pkts_n > 1) { - volatile void *pkt_addr; - - pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *); - rte_prefetch0(pkt_addr); - } + if (pkts_n - i > 1) + rte_prefetch0( + rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + const uint64_t is_tunneled = buf->ol_flags & + (PKT_TX_TUNNEL_GRE | + PKT_TX_TUNNEL_VXLAN); + + if (is_tunneled && txq->tunnel_en) { + cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) + cs_flags |= MLX5_ETH_WQE_L3_CSUM; + } else { + cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + } } raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* Replace the Ethernet type by the VLAN if necessary. */ @@ -541,15 +454,83 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) length -= pkt_inline_sz; addr += pkt_inline_sz; } + if (txq->tso_en) { + tso = buf->ol_flags & PKT_TX_TCP_SEG; + if (tso) { + uintptr_t end = (uintptr_t) + (((uintptr_t)txq->wqes) + + (1 << txq->wqe_n) * + MLX5_WQE_SIZE); + unsigned int copy_b; + uint8_t vlan_sz = (buf->ol_flags & + PKT_TX_VLAN_PKT) ? 4 : 0; + const uint64_t is_tunneled = + buf->ol_flags & + (PKT_TX_TUNNEL_GRE | + PKT_TX_TUNNEL_VXLAN); + + tso_header_sz = buf->l2_len + vlan_sz + + buf->l3_len + buf->l4_len; + tso_segsz = buf->tso_segsz; + + if (is_tunneled && txq->tunnel_en) { + tso_header_sz += buf->outer_l2_len + + buf->outer_l3_len; + cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } else { + cs_flags |= MLX5_ETH_WQE_L4_CSUM; + } + if (unlikely(tso_header_sz > + MLX5_MAX_TSO_HEADER)) + break; + copy_b = tso_header_sz - pkt_inline_sz; + /* First seg must contain all headers. */ + assert(copy_b <= length); + raw += MLX5_WQE_DWORD_SIZE; + if (copy_b && + ((end - (uintptr_t)raw) > copy_b)) { + uint16_t n = (MLX5_WQE_DS(copy_b) - + 1 + 3) / 4; + + if (unlikely(max_wqe < n)) + break; + max_wqe -= n; + rte_memcpy((void *)raw, + (void *)addr, copy_b); + addr += copy_b; + length -= copy_b; + pkt_inline_sz += copy_b; + /* + * Another DWORD will be added + * in the inline part. + */ + raw += MLX5_WQE_DS(copy_b) * + MLX5_WQE_DWORD_SIZE - + MLX5_WQE_DWORD_SIZE; + } else { + /* NOP WQE. */ + wqe->ctrl = (rte_v128u32_t){ + htonl(txq->wqe_ci << 8), + htonl(txq->qp_num_8s | 1), + 0, + 0, + }; + ds = 1; + total_length = 0; + k++; + goto next_wqe; + } + } + } /* Inline if enough room. */ - if (txq->max_inline) { + if (inline_en || tso) { uintptr_t end = (uintptr_t) (((uintptr_t)txq->wqes) + (1 << txq->wqe_n) * MLX5_WQE_SIZE); - unsigned int max_inline = txq->max_inline * - RTE_CACHE_LINE_SIZE - - MLX5_WQE_DWORD_SIZE; - uintptr_t addr_end = (addr + max_inline) & + unsigned int inline_room = max_inline * + RTE_CACHE_LINE_SIZE - + (pkt_inline_sz - 2); + uintptr_t addr_end = (addr + inline_room) & ~(RTE_CACHE_LINE_SIZE - 1); unsigned int copy_b = (addr_end > addr) ? RTE_MIN((addr_end - addr), length) : @@ -567,6 +548,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(max_wqe < n)) break; max_wqe -= n; + if (tso) { + uint32_t inl = + htonl(copy_b | MLX5_INLINE_SEG); + + pkt_inline_sz = + MLX5_WQE_DS(tso_header_sz) * + MLX5_WQE_DWORD_SIZE; + rte_memcpy((void *)raw, + (void *)&inl, sizeof(inl)); + raw += sizeof(inl); + pkt_inline_sz += sizeof(inl); + } rte_memcpy((void *)raw, (void *)addr, copy_b); addr += copy_b; length -= copy_b; @@ -613,7 +606,7 @@ use_dseg: naddr = htonll(addr); *dseg = (rte_v128u32_t){ htonl(length), - txq_mp2mr(txq, txq_mb2mp(buf)), + mlx5_tx_mb2mr(txq, buf), naddr, naddr >> 32, }; @@ -652,52 +645,69 @@ next_seg: naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); *dseg = (rte_v128u32_t){ htonl(length), - txq_mp2mr(txq, txq_mb2mp(buf)), + mlx5_tx_mb2mr(txq, buf), naddr, naddr >> 32, }; - (*txq->elts)[elts_head] = buf; - elts_head = (elts_head + 1) & (elts_n - 1); - ++j; - --segs_n; - if (segs_n) + (*txq->elts)[++elts_head & elts_m] = buf; + ++sg; + /* Advance counter only if all segs are successfully posted. */ + if (sg < segs_n) goto next_seg; else - --pkts_n; + j += sg; next_pkt: + ++elts_head; + ++pkts; ++i; /* Initialize known and common part of the WQE structure. */ - wqe->ctrl = (rte_v128u32_t){ - htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), - htonl(txq->qp_num_8s | ds), - 0, - 0, - }; - wqe->eseg = (rte_v128u32_t){ - 0, - cs_flags, - 0, - (ehdr << 16) | htons(pkt_inline_sz), - }; + if (tso) { + wqe->ctrl = (rte_v128u32_t){ + htonl((txq->wqe_ci << 8) | MLX5_OPCODE_TSO), + htonl(txq->qp_num_8s | ds), + 0, + 0, + }; + wqe->eseg = (rte_v128u32_t){ + 0, + cs_flags | (htons(tso_segsz) << 16), + 0, + (ehdr << 16) | htons(tso_header_sz), + }; + } else { + wqe->ctrl = (rte_v128u32_t){ + htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), + htonl(txq->qp_num_8s | ds), + 0, + 0, + }; + wqe->eseg = (rte_v128u32_t){ + 0, + cs_flags, + 0, + (ehdr << 16) | htons(pkt_inline_sz), + }; + } +next_wqe: txq->wqe_ci += (ds + 3) / 4; + /* Save the last successful WQE for completion request */ + last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += total_length; #endif - } while (pkts_n); + } while (i < pkts_n); /* Take a shortcut if nothing must be sent. */ - if (unlikely(i == 0)) + if (unlikely((i + k) == 0)) return 0; + txq->elts_head += (i + j); /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i + j; + comp = txq->elts_comp + i + j + k; if (comp >= MLX5_TX_COMP_THRESH) { - volatile struct mlx5_wqe_ctrl *w = - (volatile struct mlx5_wqe_ctrl *)wqe; - /* Request completion on last WQE. */ - w->ctrl2 = htonl(8); + last_wqe->ctrl2 = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - w->ctrl3 = elts_head; + last_wqe->ctrl3 = txq->elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -707,8 +717,7 @@ next_pkt: txq->stats.opackets += i; #endif /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe); - txq->elts_head = elts_head; + mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); return i; } @@ -798,10 +807,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = 1 << txq->elts_n; + const uint16_t elts_n = 1 << txq->elts_n; + const uint16_t elts_m = elts_n - 1; unsigned int i = 0; unsigned int j = 0; - unsigned int max; + uint16_t max_elts; uint16_t max_wqe; unsigned int comp; struct mlx5_mpw mpw = { @@ -814,16 +824,13 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ - txq_complete(txq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; + mlx5_tx_complete(txq); + max_elts = (elts_n - (elts_head - txq->elts_tail)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; do { struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; uint32_t length; unsigned int segs_n = buf->nb_segs; uint32_t cs_flags = 0; @@ -833,12 +840,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * that one ring entry remains unused. */ assert(segs_n); - if (max < segs_n + 1) + if (max_elts < segs_n) break; /* Do not bother with large packets MPW cannot handle. */ if (segs_n > MLX5_MPW_DSEG_MAX) break; - max -= segs_n; + max_elts -= segs_n; --pkts_n; /* Should we enable HW CKSUM offload */ if (buf->ol_flags & @@ -874,17 +881,15 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) volatile struct mlx5_wqe_data_seg *dseg; uintptr_t addr; - elts_head_next = (elts_head + 1) & (elts_n - 1); assert(buf); - (*txq->elts)[elts_head] = buf; + (*txq->elts)[elts_head++ & elts_m] = buf; dseg = mpw.data.dseg[mpw.pkts_n]; addr = rte_pktmbuf_mtod(buf, uintptr_t); *dseg = (struct mlx5_wqe_data_seg){ .byte_count = htonl(DATA_LEN(buf)), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .lkey = mlx5_tx_mb2mr(txq, buf), .addr = htonll(addr), }; - elts_head = elts_head_next; #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) length += DATA_LEN(buf); #endif @@ -895,7 +900,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) assert(length == mpw.len); if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) mlx5_mpw_close(txq, &mpw); - elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += length; @@ -1013,10 +1017,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = 1 << txq->elts_n; + const uint16_t elts_n = 1 << txq->elts_n; + const uint16_t elts_m = elts_n - 1; unsigned int i = 0; unsigned int j = 0; - unsigned int max; + uint16_t max_elts; uint16_t max_wqe; unsigned int comp; unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; @@ -1042,13 +1047,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ - txq_complete(txq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; + mlx5_tx_complete(txq); + max_elts = (elts_n - (elts_head - txq->elts_tail)); do { struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; uintptr_t addr; uint32_t length; unsigned int segs_n = buf->nb_segs; @@ -1059,12 +1061,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, * that one ring entry remains unused. */ assert(segs_n); - if (max < segs_n + 1) + if (max_elts < segs_n) break; /* Do not bother with large packets MPW cannot handle. */ if (segs_n > MLX5_MPW_DSEG_MAX) break; - max -= segs_n; + max_elts -= segs_n; --pkts_n; /* * Compute max_wqe in case less WQE were consumed in previous @@ -1125,18 +1127,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, do { volatile struct mlx5_wqe_data_seg *dseg; - elts_head_next = - (elts_head + 1) & (elts_n - 1); assert(buf); - (*txq->elts)[elts_head] = buf; + (*txq->elts)[elts_head++ & elts_m] = buf; dseg = mpw.data.dseg[mpw.pkts_n]; addr = rte_pktmbuf_mtod(buf, uintptr_t); *dseg = (struct mlx5_wqe_data_seg){ .byte_count = htonl(DATA_LEN(buf)), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .lkey = mlx5_tx_mb2mr(txq, buf), .addr = htonll(addr), }; - elts_head = elts_head_next; #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) length += DATA_LEN(buf); #endif @@ -1153,9 +1152,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); assert(length <= inline_room); assert(length == DATA_LEN(buf)); - elts_head_next = (elts_head + 1) & (elts_n - 1); addr = rte_pktmbuf_mtod(buf, uintptr_t); - (*txq->elts)[elts_head] = buf; + (*txq->elts)[elts_head++ & elts_m] = buf; /* Maximum number of bytes before wrapping. */ max = ((((uintptr_t)(txq->wqes)) + (1 << txq->wqe_n) * @@ -1192,7 +1190,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, inline_room -= length; } } - elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += length; @@ -1230,6 +1227,352 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, return i; } +/** + * Open an Enhanced MPW session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + * @param length + * Packet length. + */ +static inline void +mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding) +{ + uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); + + mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; + mpw->pkts_n = 0; + mpw->total_len = sizeof(struct mlx5_wqe); + mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); + mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | + (txq->wqe_ci << 8) | + MLX5_OPCODE_ENHANCED_MPSW); + mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[3] = 0; + memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); + if (unlikely(padding)) { + uintptr_t addr = (uintptr_t)(mpw->wqe + 1); + + /* Pad the first 2 DWORDs with zero-length inline header. */ + *(volatile uint32_t *)addr = htonl(MLX5_INLINE_SEG); + *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = + htonl(MLX5_INLINE_SEG); + mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; + /* Start from the next WQEBB. */ + mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); + } else { + mpw->data.raw = (volatile void *)(mpw->wqe + 1); + } +} + +/** + * Close an Enhanced MPW session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + * + * @return + * Number of consumed WQEs. + */ +static inline uint16_t +mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw) +{ + uint16_t ret; + + /* Store size in multiple of 16 bytes. Control and Ethernet segments + * count as 2. + */ + mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(mpw->total_len)); + mpw->state = MLX5_MPW_STATE_CLOSED; + ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; + txq->wqe_ci += ret; + return ret; +} + +/** + * DPDK callback for TX with Enhanced MPW support. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct txq *txq = (struct txq *)dpdk_txq; + uint16_t elts_head = txq->elts_head; + const uint16_t elts_n = 1 << txq->elts_n; + const uint16_t elts_m = elts_n - 1; + unsigned int i = 0; + unsigned int j = 0; + uint16_t max_elts; + uint16_t max_wqe; + unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; + unsigned int mpw_room = 0; + unsigned int inl_pad = 0; + uint32_t inl_hdr; + struct mlx5_mpw mpw = { + .state = MLX5_MPW_STATE_CLOSED, + }; + + if (unlikely(!pkts_n)) + return 0; + /* Start processing. */ + mlx5_tx_complete(txq); + max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; + do { + struct rte_mbuf *buf = *(pkts++); + uintptr_t addr; + uint64_t naddr; + unsigned int n; + unsigned int do_inline = 0; /* Whether inline is possible. */ + uint32_t length; + unsigned int segs_n = buf->nb_segs; + uint32_t cs_flags = 0; + + /* + * Make sure there is enough room to store this packet and + * that one ring entry remains unused. + */ + assert(segs_n); + if (max_elts - j < segs_n) + break; + /* Do not bother with large packets MPW cannot handle. */ + if (segs_n > MLX5_MPW_DSEG_MAX) + break; + /* Should we enable HW CKSUM offload. */ + if (buf->ol_flags & + (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + /* Retrieve packet information. */ + length = PKT_LEN(buf); + /* Start new session if: + * - multi-segment packet + * - no space left even for a dseg + * - next packet can be inlined with a new WQE + * - cs_flag differs + * It can't be MLX5_MPW_STATE_OPENED as always have a single + * segmented packet. + */ + if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { + if ((segs_n != 1) || + (inl_pad + sizeof(struct mlx5_wqe_data_seg) > + mpw_room) || + (length <= txq->inline_max_packet_sz && + inl_pad + sizeof(inl_hdr) + length > + mpw_room) || + (mpw.wqe->eseg.cs_flags != cs_flags)) + max_wqe -= mlx5_empw_close(txq, &mpw); + } + if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { + if (unlikely(segs_n != 1)) { + /* Fall back to legacy MPW. + * A MPW session consumes 2 WQEs at most to + * include MLX5_MPW_DSEG_MAX pointers. + */ + if (unlikely(max_wqe < 2)) + break; + mlx5_mpw_new(txq, &mpw, length); + } else { + /* In Enhanced MPW, inline as much as the budget + * is allowed. The remaining space is to be + * filled with dsegs. If the title WQEBB isn't + * padded, it will have 2 dsegs there. + */ + mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, + (max_inline ? max_inline : + pkts_n * MLX5_WQE_DWORD_SIZE) + + MLX5_WQE_SIZE); + if (unlikely(max_wqe * MLX5_WQE_SIZE < + mpw_room)) + break; + /* Don't pad the title WQEBB to not waste WQ. */ + mlx5_empw_new(txq, &mpw, 0); + mpw_room -= mpw.total_len; + inl_pad = 0; + do_inline = + length <= txq->inline_max_packet_sz && + sizeof(inl_hdr) + length <= mpw_room && + !txq->mpw_hdr_dseg; + } + mpw.wqe->eseg.cs_flags = cs_flags; + } else { + /* Evaluate whether the next packet can be inlined. + * Inlininig is possible when: + * - length is less than configured value + * - length fits for remaining space + * - not required to fill the title WQEBB with dsegs + */ + do_inline = + length <= txq->inline_max_packet_sz && + inl_pad + sizeof(inl_hdr) + length <= + mpw_room && + (!txq->mpw_hdr_dseg || + mpw.total_len >= MLX5_WQE_SIZE); + } + /* Multi-segment packets must be alone in their MPW. */ + assert((segs_n == 1) || (mpw.pkts_n == 0)); + if (unlikely(mpw.state == MLX5_MPW_STATE_OPENED)) { +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length = 0; +#endif + do { + volatile struct mlx5_wqe_data_seg *dseg; + + assert(buf); + (*txq->elts)[elts_head++ & elts_m] = buf; + dseg = mpw.data.dseg[mpw.pkts_n]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + *dseg = (struct mlx5_wqe_data_seg){ + .byte_count = htonl(DATA_LEN(buf)), + .lkey = mlx5_tx_mb2mr(txq, buf), + .addr = htonll(addr), + }; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length += DATA_LEN(buf); +#endif + buf = buf->next; + ++j; + ++mpw.pkts_n; + } while (--segs_n); + /* A multi-segmented packet takes one MPW session. + * TODO: Pack more multi-segmented packets if possible. + */ + mlx5_mpw_close(txq, &mpw); + if (mpw.pkts_n < 3) + max_wqe--; + else + max_wqe -= 2; + } else if (do_inline) { + /* Inline packet into WQE. */ + unsigned int max; + + assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); + assert(length == DATA_LEN(buf)); + inl_hdr = htonl(length | MLX5_INLINE_SEG); + addr = rte_pktmbuf_mtod(buf, uintptr_t); + mpw.data.raw = (volatile void *) + ((uintptr_t)mpw.data.raw + inl_pad); + max = tx_mlx5_wq_tailroom(txq, + (void *)(uintptr_t)mpw.data.raw); + /* Copy inline header. */ + mpw.data.raw = (volatile void *) + mlx5_copy_to_wq( + (void *)(uintptr_t)mpw.data.raw, + &inl_hdr, + sizeof(inl_hdr), + (void *)(uintptr_t)txq->wqes, + max); + max = tx_mlx5_wq_tailroom(txq, + (void *)(uintptr_t)mpw.data.raw); + /* Copy packet data. */ + mpw.data.raw = (volatile void *) + mlx5_copy_to_wq( + (void *)(uintptr_t)mpw.data.raw, + (void *)addr, + length, + (void *)(uintptr_t)txq->wqes, + max); + ++mpw.pkts_n; + mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); + /* No need to get completion as the entire packet is + * copied to WQ. Free the buf right away. + */ + rte_pktmbuf_free_seg(buf); + mpw_room -= (inl_pad + sizeof(inl_hdr) + length); + /* Add pad in the next packet if any. */ + inl_pad = (((uintptr_t)mpw.data.raw + + (MLX5_WQE_DWORD_SIZE - 1)) & + ~(MLX5_WQE_DWORD_SIZE - 1)) - + (uintptr_t)mpw.data.raw; + } else { + /* No inline. Load a dseg of packet pointer. */ + volatile rte_v128u32_t *dseg; + + assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); + assert((inl_pad + sizeof(*dseg)) <= mpw_room); + assert(length == DATA_LEN(buf)); + if (!tx_mlx5_wq_tailroom(txq, + (void *)((uintptr_t)mpw.data.raw + + inl_pad))) + dseg = (volatile void *)txq->wqes; + else + dseg = (volatile void *) + ((uintptr_t)mpw.data.raw + + inl_pad); + (*txq->elts)[elts_head++ & elts_m] = buf; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++) + rte_prefetch2((void *)(addr + + n * RTE_CACHE_LINE_SIZE)); + naddr = htonll(addr); + *dseg = (rte_v128u32_t) { + htonl(length), + mlx5_tx_mb2mr(txq, buf), + naddr, + naddr >> 32, + }; + mpw.data.raw = (volatile void *)(dseg + 1); + mpw.total_len += (inl_pad + sizeof(*dseg)); + ++j; + ++mpw.pkts_n; + mpw_room -= (inl_pad + sizeof(*dseg)); + inl_pad = 0; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent bytes counter. */ + txq->stats.obytes += length; +#endif + ++i; + } while (i < pkts_n); + /* Take a shortcut if nothing must be sent. */ + if (unlikely(i == 0)) + return 0; + /* Check whether completion threshold has been reached. */ + if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || + (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= + (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { + volatile struct mlx5_wqe *wqe = mpw.wqe; + + /* Request completion on last WQE. */ + wqe->ctrl[2] = htonl(8); + /* Save elts_head in unused "immediate" field of WQE. */ + wqe->ctrl[3] = elts_head; + txq->elts_comp = 0; + txq->mpw_comp = txq->wqe_ci; + txq->cq_pi++; + } else { + txq->elts_comp += j; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent packets counter. */ + txq->stats.opackets += i; +#endif + if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) + mlx5_empw_close(txq, &mpw); + else if (mpw.state == MLX5_MPW_STATE_OPENED) + mlx5_mpw_close(txq, &mpw); + /* Ring QP doorbell. */ + mlx5_tx_dbrec(txq, mpw.wqe); + txq->elts_head = elts_head; + return i; +} + /** * Translate RX completion flags to packet type. * @@ -1244,30 +1587,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, static inline uint32_t rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) { - uint32_t pkt_type; - uint16_t flags = ntohs(cqe->hdr_type_etc); + uint8_t idx; + uint8_t pinfo = cqe->pkt_info; + uint16_t ptype = cqe->hdr_type_etc; - if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) { - pkt_type = - TRANSPOSE(flags, - MLX5_CQE_RX_IPV4_PACKET, - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | - TRANSPOSE(flags, - MLX5_CQE_RX_IPV6_PACKET, - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); - pkt_type |= ((cqe->pkt_info & MLX5_CQE_RX_OUTER_PACKET) ? - RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); - } else { - pkt_type = - TRANSPOSE(flags, - MLX5_CQE_L3_HDR_TYPE_IPV6, - RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | - TRANSPOSE(flags, - MLX5_CQE_L3_HDR_TYPE_IPV4, - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); - } - return pkt_type; + /* + * The index to the array should have: + * bit[1:0] = l3_hdr_type + * bit[4:2] = l4_hdr_type + * bit[5] = ip_frag + * bit[6] = tunneled + * bit[7] = outer_l3_type + */ + idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); + return mlx5_ptype_table[idx]; } /** @@ -1299,7 +1632,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, if (zip->ai) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) - (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]); + (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); len = ntohl((*mc)[zip->ai & 7].byte_cnt); *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); @@ -1347,7 +1680,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & - cqe_cnt]); + cqe_cnt].pkt_info); /* Fix endianness. */ zip->cqe_cnt = ntohl(cqe->byte_cnt); @@ -1447,7 +1780,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; unsigned int i = 0; unsigned int rq_ci = rxq->rq_ci << sges_n; - int len; /* keep its value across iterations. */ + int len = 0; /* keep its value across iterations. */ while (pkts_n) { unsigned int idx = rq_ci & wqe_cnt; @@ -1474,8 +1807,9 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) while (pkt != seg) { assert(pkt != (*rxq->elts)[idx]); rep = NEXT(pkt); - rte_mbuf_refcnt_set(pkt, 0); - __rte_mbuf_raw_free(pkt); + NEXT(pkt) = NULL; + NB_SEGS(pkt) = 1; + rte_mbuf_raw_free(pkt); pkt = rep; } break; @@ -1485,61 +1819,53 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &rss_hash_res); if (!len) { - rte_mbuf_refcnt_set(rep, 0); - __rte_mbuf_raw_free(rep); + rte_mbuf_raw_free(rep); break; } if (unlikely(len == -1)) { /* RX error, packet is likely too large. */ - rte_mbuf_refcnt_set(rep, 0); - __rte_mbuf_raw_free(rep); + rte_mbuf_raw_free(rep); ++rxq->stats.idropped; goto skip; } pkt = seg; assert(len >= (rxq->crc_present << 2)); /* Update packet information. */ - pkt->packet_type = 0; + pkt->packet_type = rxq_cq_to_pkt_type(cqe); pkt->ol_flags = 0; if (rss_hash_res && rxq->rss_hash) { pkt->hash.rss = rss_hash_res; pkt->ol_flags = PKT_RX_RSS_HASH; } if (rxq->mark && - ((cqe->sop_drop_qpn != - htonl(MLX5_FLOW_MARK_INVALID)) && - (cqe->sop_drop_qpn != - htonl(MLX5_FLOW_MARK_DEFAULT)))) { - pkt->hash.fdir.hi = - mlx5_flow_mark_get(cqe->sop_drop_qpn); - pkt->ol_flags &= ~PKT_RX_RSS_HASH; - pkt->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; - } - if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | - rxq->crc_present) { - if (rxq->csum) { - pkt->packet_type = - rxq_cq_to_pkt_type(cqe); - pkt->ol_flags |= - rxq_cq_to_ol_flags(rxq, cqe); - } - if (ntohs(cqe->hdr_type_etc) & - MLX5_CQE_VLAN_STRIPPED) { - pkt->ol_flags |= PKT_RX_VLAN_PKT | - PKT_RX_VLAN_STRIPPED; - pkt->vlan_tci = ntohs(cqe->vlan_info); + MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { + pkt->ol_flags |= PKT_RX_FDIR; + if (cqe->sop_drop_qpn != + htonl(MLX5_FLOW_MARK_DEFAULT)) { + uint32_t mark = cqe->sop_drop_qpn; + + pkt->ol_flags |= PKT_RX_FDIR_ID; + pkt->hash.fdir.hi = + mlx5_flow_mark_get(mark); } - if (rxq->crc_present) - len -= ETHER_CRC_LEN; } + if (rxq->csum | rxq->csum_l2tun) + pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); + if (rxq->vlan_strip && + (cqe->hdr_type_etc & + htons(MLX5_CQE_VLAN_STRIPPED))) { + pkt->ol_flags |= PKT_RX_VLAN_PKT | + PKT_RX_VLAN_STRIPPED; + pkt->vlan_tci = ntohs(cqe->vlan_info); + } + if (rxq->crc_present) + len -= ETHER_CRC_LEN; PKT_LEN(pkt) = len; } DATA_LEN(rep) = DATA_LEN(seg); PKT_LEN(rep) = PKT_LEN(seg); SET_DATA_OFF(rep, DATA_OFF(seg)); - NB_SEGS(rep) = NB_SEGS(seg); PORT(rep) = PORT(seg); - NEXT(rep) = NULL; (*rxq->elts)[idx] = rep; /* * Fill NIC descriptor with the new buffer. The lkey and size @@ -1633,3 +1959,65 @@ removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) (void)pkts_n; return 0; } + +/* + * Vectorized Rx/Tx routines are not compiled in when required vector + * instructions are not supported on a target architecture. The following null + * stubs are needed for linkage when those are not included outside of this file + * (e.g. mlx5_rxtx_vec_sse.c for x86). + */ + +uint16_t __attribute__((weak)) +mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + (void)dpdk_txq; + (void)pkts; + (void)pkts_n; + return 0; +} + +uint16_t __attribute__((weak)) +mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + (void)dpdk_txq; + (void)pkts; + (void)pkts_n; + return 0; +} + +uint16_t __attribute__((weak)) +mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + (void)dpdk_rxq; + (void)pkts; + (void)pkts_n; + return 0; +} + +int __attribute__((weak)) +priv_check_raw_vec_tx_support(struct priv *priv) +{ + (void)priv; + return -ENOTSUP; +} + +int __attribute__((weak)) +priv_check_vec_tx_support(struct priv *priv) +{ + (void)priv; + return -ENOTSUP; +} + +int __attribute__((weak)) +rxq_check_vec_support(struct rxq *rxq) +{ + (void)rxq; + return -ENOTSUP; +} + +int __attribute__((weak)) +priv_check_vec_rx_support(struct priv *priv) +{ + (void)priv; + return -ENOTSUP; +}