X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=c1dc8c4e17162efeeb3c327478f265059c9728f4;hb=5f6bd807691e300b2d8c25c209fb778fa6692d61;hp=734ba0b92e894e80ef4dbce620195fbb9fb61a6d;hpb=5f44cfd011478bcf00430c53f276ddf9b795d443;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 734ba0b92e..c1dc8c4e17 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "mlx5.h" #include "mlx5_utils.h" @@ -38,7 +39,7 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); static __rte_always_inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, - uint16_t cqe_cnt, uint32_t *rss_hash); + uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); static __rte_always_inline uint32_t rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); @@ -50,6 +51,10 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, static __rte_always_inline void mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx); +static int +mlx5_queue_state_modify(struct rte_eth_dev *dev, + struct mlx5_mp_arg_queue_state_modify *sm); + uint32_t mlx5_ptype_table[] __rte_cache_aligned = { [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ }; @@ -417,20 +422,17 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) } /** - * DPDK callback to check the status of a rx descriptor. + * Internal function to compute the number of used descriptors in an RX queue * - * @param rx_queue - * The rx queue. - * @param[in] offset - * The index of the descriptor in the ring. + * @param rxq + * The Rx queue. * * @return - * The status of the tx descriptor. + * The number of used rx descriptor. */ -int -mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) +static uint32_t +rx_queue_count(struct mlx5_rxq_data *rxq) { - struct mlx5_rxq_data *rxq = rx_queue; struct rxq_zip *zip = &rxq->zip; volatile struct mlx5_cqe *cqe; const unsigned int cqe_n = (1 << rxq->cqe_n); @@ -447,7 +449,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) cq_ci = rxq->cq_ci; } cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; - while (check_cqe(cqe, cqe_n, cq_ci) == 0) { + while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { int8_t op_own; unsigned int n; @@ -461,11 +463,225 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; } used = RTE_MIN(used, (1U << rxq->elts_n) - 1); - if (offset < used) + return used; +} + +/** + * DPDK callback to check the status of a rx descriptor. + * + * @param rx_queue + * The Rx queue. + * @param[in] offset + * The index of the descriptor in the ring. + * + * @return + * The status of the tx descriptor. + */ +int +mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) +{ + struct mlx5_rxq_data *rxq = rx_queue; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); + + if (dev->rx_pkt_burst != mlx5_rx_burst) { + rte_errno = ENOTSUP; + return -rte_errno; + } + if (offset >= (1 << rxq->elts_n)) { + rte_errno = EINVAL; + return -rte_errno; + } + if (offset < rx_queue_count(rxq)) return RTE_ETH_RX_DESC_DONE; return RTE_ETH_RX_DESC_AVAIL; } +/** + * DPDK callback to get the number of used descriptors in a RX queue + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * The Rx queue. + * + * @return + * The number of used rx descriptor. + * -EINVAL if the queue is invalid + */ +uint32_t +mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq; + + if (dev->rx_pkt_burst != mlx5_rx_burst) { + rte_errno = ENOTSUP; + return -rte_errno; + } + rxq = (*priv->rxqs)[rx_queue_id]; + if (!rxq) { + rte_errno = EINVAL; + return -rte_errno; + } + return rx_queue_count(rxq); +} + +#define MLX5_SYSTEM_LOG_DIR "/var/log" +/** + * Dump debug information to log file. + * + * @param fname + * The file name. + * @param hex_title + * If not NULL this string is printed as a header to the output + * and the output will be in hexadecimal view. + * @param buf + * This is the buffer address to print out. + * @param len + * The number of bytes to dump out. + */ +void +mlx5_dump_debug_information(const char *fname, const char *hex_title, + const void *buf, unsigned int hex_len) +{ + FILE *fd; + + MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); + fd = fopen(path, "a+"); + if (!fd) { + DRV_LOG(WARNING, "cannot open %s for debug dump\n", + path); + MKSTR(path2, "./%s", fname); + fd = fopen(path2, "a+"); + if (!fd) { + DRV_LOG(ERR, "cannot open %s for debug dump\n", + path2); + return; + } + DRV_LOG(INFO, "New debug dump in file %s\n", path2); + } else { + DRV_LOG(INFO, "New debug dump in file %s\n", path); + } + if (hex_title) + rte_hexdump(fd, hex_title, buf, hex_len); + else + fprintf(fd, "%s", (const char *)buf); + fprintf(fd, "\n\n\n"); + fclose(fd); +} + +/** + * Move QP from error state to running state and initialize indexes. + * + * @param txq_ctrl + * Pointer to TX queue control structure. + * + * @return + * 0 on success, else -1. + */ +static int +tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) +{ + struct mlx5_mp_arg_queue_state_modify sm = { + .is_wq = 0, + .queue_id = txq_ctrl->txq.idx, + }; + + if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) + return -1; + txq_ctrl->txq.wqe_ci = 0; + txq_ctrl->txq.wqe_pi = 0; + txq_ctrl->txq.elts_comp = 0; + return 0; +} + +/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ +static int +check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) +{ + static const uint8_t magic[] = "seen"; + int ret = 1; + unsigned int i; + + for (i = 0; i < sizeof(magic); ++i) + if (!ret || err_cqe->rsvd1[i] != magic[i]) { + ret = 0; + err_cqe->rsvd1[i] = magic[i]; + } + return ret; +} + +/** + * Handle error CQE. + * + * @param txq + * Pointer to TX queue structure. + * @param error_cqe + * Pointer to the error CQE. + * + * @return + * The last Tx buffer element to free. + */ +uint16_t +mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq, + volatile struct mlx5_err_cqe *err_cqe) +{ + if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { + const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); + uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); + int seen = check_err_cqe_seen(err_cqe); + + if (!seen && txq_ctrl->dump_file_n < + txq_ctrl->priv->config.max_dump_files_num) { + MKSTR(err_str, "Unexpected CQE error syndrome " + "0x%02x CQN = %u SQN = %u wqe_counter = %u " + "wq_ci = %u cq_ci = %u", err_cqe->syndrome, + txq_ctrl->cqn, txq->qp_num_8s >> 8, + rte_be_to_cpu_16(err_cqe->wqe_counter), + txq->wqe_ci, txq->cq_ci); + MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", + PORT_ID(txq_ctrl->priv), txq->idx, + txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); + mlx5_dump_debug_information(name, NULL, err_str, 0); + mlx5_dump_debug_information(name, "MLX5 Error CQ:", + (const void *)((uintptr_t) + &(*txq->cqes)[0]), + sizeof(*err_cqe) * + (1 << txq->cqe_n)); + mlx5_dump_debug_information(name, "MLX5 Error SQ:", + (const void *)((uintptr_t) + tx_mlx5_wqe(txq, 0)), + MLX5_WQE_SIZE * + (1 << txq->wqe_n)); + txq_ctrl->dump_file_n++; + } + if (!seen) + /* + * Count errors in WQEs units. + * Later it can be improved to count error packets, + * for example, by SQ parsing to find how much packets + * should be counted for each WQE. + */ + txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - + new_wqe_pi) & wqe_m; + if (tx_recover_qp(txq_ctrl) == 0) { + txq->cq_ci++; + /* Release all the remaining buffers. */ + return txq->elts_head; + } + /* Recovering failed - try again later on the same WQE. */ + } else { + txq->cq_ci++; + } + /* Do not release buffers. */ + return txq->elts_tail; +} + /** * DPDK callback for TX. * @@ -495,6 +711,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) volatile struct mlx5_wqe_ctrl *last_wqe = NULL; unsigned int segs_n = 0; const unsigned int max_inline = txq->max_inline; + uint64_t addr_64; if (unlikely(!pkts_n)) return 0; @@ -503,8 +720,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; @@ -522,9 +737,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t ehdr; uint8_t cs_flags; uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); - uint8_t is_vlan = !!(buf->ol_flags & PKT_TX_VLAN_PKT); uint32_t swp_offsets = 0; uint8_t swp_types = 0; + rte_be32_t metadata; uint16_t tso_segsz = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; @@ -566,14 +781,16 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0( rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); cs_flags = txq_ol_cksum_to_cs(buf); - txq_mbuf_to_swp(txq, buf, tso, is_vlan, - (uint8_t *)&swp_offsets, &swp_types); + txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types); raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; + /* Copy metadata from mbuf if valid */ + metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : + 0; /* Replace the Ethernet type by the VLAN if necessary. */ - if (is_vlan) { + if (buf->ol_flags & PKT_TX_VLAN_PKT) { uint32_t vlan = rte_cpu_to_be_32(0x81000000 | buf->vlan_tci); - unsigned int len = 2 * ETHER_ADDR_LEN - 2; + unsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2; addr += 2; length -= 2; @@ -605,7 +822,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) wqe->ctrl = (rte_v128u32_t){ rte_cpu_to_be_32(txq->wqe_ci << 8), rte_cpu_to_be_32(txq->qp_num_8s | 1), - 0, + rte_cpu_to_be_32 + (MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET), 0, }; ds = 1; @@ -634,7 +853,8 @@ pkt_inline: RTE_CACHE_LINE_SIZE); copy_b = (addr_end > addr) ? RTE_MIN((addr_end - addr), length) : 0; - if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { + if (copy_b && ((end - (uintptr_t)raw) > + (copy_b + sizeof(inl)))) { /* * One Dseg remains in the current WQE. To * keep the computation positive, it is @@ -713,12 +933,12 @@ pkt_inline: ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - addr = rte_cpu_to_be_64(addr); + addr_64 = rte_cpu_to_be_64(addr); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - addr, - addr >> 32, + addr_64, + addr_64 >> 32, }; ++ds; if (!segs_n) @@ -752,12 +972,12 @@ next_seg: total_length += length; #endif /* Store segment information. */ - addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); + addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - addr, - addr >> 32, + addr_64, + addr_64 >> 32, }; (*txq->elts)[++elts_head & elts_m] = buf; if (--segs_n) @@ -777,14 +997,15 @@ next_pkt: rte_cpu_to_be_32((txq->wqe_ci << 8) | MLX5_OPCODE_TSO), rte_cpu_to_be_32(txq->qp_num_8s | ds), - 0, + rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET), 0, }; wqe->eseg = (rte_v128u32_t){ swp_offsets, cs_flags | (swp_types << 8) | (rte_cpu_to_be_16(tso_segsz) << 16), - 0, + metadata, (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), }; } else { @@ -792,13 +1013,14 @@ next_pkt: rte_cpu_to_be_32((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), rte_cpu_to_be_32(txq->qp_num_8s | ds), - 0, + rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET), 0, }; wqe->eseg = (rte_v128u32_t){ swp_offsets, cs_flags | (swp_types << 8), - 0, + metadata, (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), }; } @@ -818,14 +1040,14 @@ next_wqe: /* Check whether completion threshold has been reached. */ comp = txq->elts_comp + i + j + k; if (comp >= MLX5_TX_COMP_THRESH) { + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request completion on last WQE. */ - last_wqe->ctrl2 = rte_cpu_to_be_32(8); + last_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << + MLX5_COMP_MODE_OFFSET); /* Save elts_head in unused "immediate" field of WQE. */ last_wqe->ctrl3 = txq->elts_head; txq->elts_comp = 0; -#ifndef NDEBUG - ++txq->cq_pi; -#endif } else { txq->elts_comp = comp; } @@ -865,11 +1087,12 @@ mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) mpw->wqe->eseg.inline_hdr_sz = 0; mpw->wqe->eseg.rsvd0 = 0; mpw->wqe->eseg.rsvd1 = 0; - mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->eseg.flow_table_metadata = 0; mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); - mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET); mpw->wqe->ctrl[3] = 0; mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); @@ -944,8 +1167,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; @@ -954,6 +1175,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint32_t length; unsigned int segs_n = buf->nb_segs; uint32_t cs_flags; + rte_be32_t metadata; /* * Make sure there is enough room to store this packet and @@ -970,6 +1192,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) max_elts -= segs_n; --pkts_n; cs_flags = txq_ol_cksum_to_cs(buf); + /* Copy metadata from mbuf if valid */ + metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : + 0; /* Retrieve packet information. */ length = PKT_LEN(buf); assert(length); @@ -977,6 +1202,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if ((mpw.state == MLX5_MPW_STATE_OPENED) && ((mpw.len != length) || (segs_n != 1) || + (mpw.wqe->eseg.flow_table_metadata != metadata) || (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { @@ -990,6 +1216,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + mpw.wqe->eseg.flow_table_metadata = metadata; } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); @@ -1034,14 +1261,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (comp >= MLX5_TX_COMP_THRESH) { volatile struct mlx5_wqe *wqe = mpw.wqe; + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request completion on last WQE. */ - wqe->ctrl[2] = rte_cpu_to_be_32(8); + wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << + MLX5_COMP_MODE_OFFSET); /* Save elts_head in unused "immediate" field of WQE. */ wqe->ctrl[3] = elts_head; txq->elts_comp = 0; -#ifndef NDEBUG - ++txq->cq_pi; -#endif } else { txq->elts_comp = comp; } @@ -1082,14 +1309,15 @@ mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); - mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET); mpw->wqe->ctrl[3] = 0; mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); mpw->wqe->eseg.inline_hdr_sz = 0; mpw->wqe->eseg.cs_flags = 0; mpw->wqe->eseg.rsvd0 = 0; mpw->wqe->eseg.rsvd1 = 0; - mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->eseg.flow_table_metadata = 0; inl = (struct mlx5_wqe_inl_small *) (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); mpw->data.raw = (uint8_t *)&inl->raw; @@ -1173,14 +1401,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); do { struct rte_mbuf *buf = *(pkts++); uintptr_t addr; uint32_t length; unsigned int segs_n = buf->nb_segs; uint8_t cs_flags; + rte_be32_t metadata; /* * Make sure there is enough room to store this packet and @@ -1202,18 +1429,23 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, */ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); cs_flags = txq_ol_cksum_to_cs(buf); + /* Copy metadata from mbuf if valid */ + metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : + 0; /* Retrieve packet information. */ length = PKT_LEN(buf); /* Start new session if packet differs. */ if (mpw.state == MLX5_MPW_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || + (mpw.wqe->eseg.flow_table_metadata != metadata) || (mpw.wqe->eseg.cs_flags != cs_flags)) mlx5_mpw_close(txq, &mpw); } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || (length > inline_room) || + (mpw.wqe->eseg.flow_table_metadata != metadata) || (mpw.wqe->eseg.cs_flags != cs_flags)) { mlx5_mpw_inline_close(txq, &mpw); inline_room = @@ -1233,12 +1465,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + mpw.wqe->eseg.flow_table_metadata = metadata; } else { if (unlikely(max_wqe < wqe_inl_n)) break; max_wqe -= wqe_inl_n; mlx5_mpw_inline_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; + mpw.wqe->eseg.flow_table_metadata = metadata; } } /* Multi-segment packets must be alone in their MPW. */ @@ -1331,14 +1565,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (comp >= MLX5_TX_COMP_THRESH) { volatile struct mlx5_wqe *wqe = mpw.wqe; + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request completion on last WQE. */ - wqe->ctrl[2] = rte_cpu_to_be_32(8); + wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << + MLX5_COMP_MODE_OFFSET); /* Save elts_head in unused "immediate" field of WQE. */ wqe->ctrl[3] = elts_head; txq->elts_comp = 0; -#ifndef NDEBUG - ++txq->cq_pi; -#endif } else { txq->elts_comp = comp; } @@ -1379,7 +1613,8 @@ mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_ENHANCED_MPSW); - mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << + MLX5_COMP_MODE_OFFSET); mpw->wqe->ctrl[3] = 0; memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); if (unlikely(padding)) { @@ -1452,6 +1687,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, unsigned int mpw_room = 0; unsigned int inl_pad = 0; uint32_t inl_hdr; + uint64_t addr_64; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; @@ -1461,8 +1697,6 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; @@ -1472,6 +1706,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, unsigned int do_inline = 0; /* Whether inline is possible. */ uint32_t length; uint8_t cs_flags; + rte_be32_t metadata; /* Multi-segmented packet is handled in slow-path outside. */ assert(NB_SEGS(buf) == 1); @@ -1479,6 +1714,9 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, if (max_elts - j == 0) break; cs_flags = txq_ol_cksum_to_cs(buf); + /* Copy metadata from mbuf if valid */ + metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : + 0; /* Retrieve packet information. */ length = PKT_LEN(buf); /* Start new session if: @@ -1493,6 +1731,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, (length <= txq->inline_max_packet_sz && inl_pad + sizeof(inl_hdr) + length > mpw_room) || + (mpw.wqe->eseg.flow_table_metadata != metadata) || (mpw.wqe->eseg.cs_flags != cs_flags)) max_wqe -= mlx5_empw_close(txq, &mpw); } @@ -1516,6 +1755,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, sizeof(inl_hdr) + length <= mpw_room && !txq->mpw_hdr_dseg; mpw.wqe->eseg.cs_flags = cs_flags; + mpw.wqe->eseg.flow_table_metadata = metadata; } else { /* Evaluate whether the next packet can be inlined. * Inlininig is possible when: @@ -1588,13 +1828,13 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, ((uintptr_t)mpw.data.raw + inl_pad); (*txq->elts)[elts_head++ & elts_m] = buf; - addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, - uintptr_t)); + addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, + uintptr_t)); *dseg = (rte_v128u32_t) { rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - addr, - addr >> 32, + addr_64, + addr_64 >> 32, }; mpw.data.raw = (volatile void *)(dseg + 1); mpw.total_len += (inl_pad + sizeof(*dseg)); @@ -1618,15 +1858,15 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { volatile struct mlx5_wqe *wqe = mpw.wqe; + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request completion on last WQE. */ - wqe->ctrl[2] = rte_cpu_to_be_32(8); + wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << + MLX5_COMP_MODE_OFFSET); /* Save elts_head in unused "immediate" field of WQE. */ wqe->ctrl[3] = elts_head; txq->elts_comp = 0; txq->mpw_comp = txq->wqe_ci; -#ifndef NDEBUG - ++txq->cq_pi; -#endif } else { txq->elts_comp += j; } @@ -1715,6 +1955,287 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); } +/** + * Initialize Rx WQ and indexes. + * + * @param[in] rxq + * Pointer to RX queue structure. + */ +void +mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) +{ + const unsigned int wqe_n = 1 << rxq->elts_n; + unsigned int i; + + for (i = 0; (i != wqe_n); ++i) { + volatile struct mlx5_wqe_data_seg *scat; + uintptr_t addr; + uint32_t byte_count; + + if (mlx5_rxq_mprq_enabled(rxq)) { + struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; + + scat = &((volatile struct mlx5_wqe_mprq *) + rxq->wqes)[i].dseg; + addr = (uintptr_t)mlx5_mprq_buf_addr(buf); + byte_count = (1 << rxq->strd_sz_n) * + (1 << rxq->strd_num_n); + } else { + struct rte_mbuf *buf = (*rxq->elts)[i]; + + scat = &((volatile struct mlx5_wqe_data_seg *) + rxq->wqes)[i]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + byte_count = DATA_LEN(buf); + } + /* scat->addr must be able to store a pointer. */ + assert(sizeof(scat->addr) >= sizeof(uintptr_t)); + *scat = (struct mlx5_wqe_data_seg){ + .addr = rte_cpu_to_be_64(addr), + .byte_count = rte_cpu_to_be_32(byte_count), + .lkey = mlx5_rx_addr2mr(rxq, addr), + }; + } + rxq->consumed_strd = 0; + rxq->decompressed = 0; + rxq->rq_pi = 0; + rxq->zip = (struct rxq_zip){ + .ai = 0, + }; + /* Update doorbell counter. */ + rxq->rq_ci = wqe_n >> rxq->sges_n; + rte_cio_wmb(); + *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); +} + +/** + * Modify a Verbs queue state. + * This must be called from the primary process. + * + * @param dev + * Pointer to Ethernet device. + * @param sm + * State modify request parameters. + * + * @return + * 0 in case of success else non-zero value and rte_errno is set. + */ +int +mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, + const struct mlx5_mp_arg_queue_state_modify *sm) +{ + int ret; + struct mlx5_priv *priv = dev->data->dev_private; + + if (sm->is_wq) { + struct ibv_wq_attr mod = { + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = sm->state, + }; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + + ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); + if (ret) { + DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", + sm->state, strerror(errno)); + rte_errno = errno; + return ret; + } + } else { + struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); + struct ibv_qp_attr mod = { + .qp_state = IBV_QPS_RESET, + .port_num = (uint8_t)priv->ibv_port, + }; + struct ibv_qp *qp = txq_ctrl->ibv->qp; + + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " + "%s\n", strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_INIT; + ret = mlx5_glue->modify_qp(qp, &mod, + (IBV_QP_STATE | IBV_QP_PORT)); + if (ret) { + DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s\n", + strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_RTR; + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s\n", + strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_RTS; + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s\n", + strerror(errno)); + rte_errno = errno; + return ret; + } + } + return 0; +} + +/** + * Modify a Verbs queue state. + * + * @param dev + * Pointer to Ethernet device. + * @param sm + * State modify request parameters. + * + * @return + * 0 in case of success else non-zero value. + */ +static int +mlx5_queue_state_modify(struct rte_eth_dev *dev, + struct mlx5_mp_arg_queue_state_modify *sm) +{ + int ret = 0; + + switch (rte_eal_process_type()) { + case RTE_PROC_PRIMARY: + ret = mlx5_queue_state_modify_primary(dev, sm); + break; + case RTE_PROC_SECONDARY: + ret = mlx5_mp_req_queue_state_modify(dev, sm); + break; + default: + break; + } + return ret; +} + +/** + * Handle a Rx error. + * The function inserts the RQ state to reset when the first error CQE is + * shown, then drains the CQ by the caller function loop. When the CQ is empty, + * it moves the RQ state to ready and initializes the RQ. + * Next CQE identification and error counting are in the caller responsibility. + * + * @param[in] rxq + * Pointer to RX queue structure. + * @param[in] mbuf_prepare + * Whether to prepare mbufs for the RQ. + * + * @return + * -1 in case of recovery error, otherwise the CQE status. + */ +int +mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) +{ + const uint16_t cqe_n = 1 << rxq->cqe_n; + const uint16_t cqe_mask = cqe_n - 1; + const unsigned int wqe_n = 1 << rxq->elts_n; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + union { + volatile struct mlx5_cqe *cqe; + volatile struct mlx5_err_cqe *err_cqe; + } u = { + .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], + }; + struct mlx5_mp_arg_queue_state_modify sm; + int ret; + + switch (rxq->err_state) { + case MLX5_RXQ_ERR_STATE_NO_ERROR: + rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; + /* Fall-through */ + case MLX5_RXQ_ERR_STATE_NEED_RESET: + sm.is_wq = 1; + sm.queue_id = rxq->idx; + sm.state = IBV_WQS_RESET; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) + return -1; + if (rxq_ctrl->dump_file_n < + rxq_ctrl->priv->config.max_dump_files_num) { + MKSTR(err_str, "Unexpected CQE error syndrome " + "0x%02x CQN = %u RQN = %u wqe_counter = %u" + " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, + rxq->cqn, rxq_ctrl->wqn, + rte_be_to_cpu_16(u.err_cqe->wqe_counter), + rxq->rq_ci << rxq->sges_n, rxq->cq_ci); + MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", + rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); + mlx5_dump_debug_information(name, NULL, err_str, 0); + mlx5_dump_debug_information(name, "MLX5 Error CQ:", + (const void *)((uintptr_t) + rxq->cqes), + sizeof(*u.cqe) * cqe_n); + mlx5_dump_debug_information(name, "MLX5 Error RQ:", + (const void *)((uintptr_t) + rxq->wqes), + 16 * wqe_n); + rxq_ctrl->dump_file_n++; + } + rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; + /* Fall-through */ + case MLX5_RXQ_ERR_STATE_NEED_READY: + ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); + if (ret == MLX5_CQE_STATUS_HW_OWN) { + rte_cio_wmb(); + *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + rte_cio_wmb(); + /* + * The RQ consumer index must be zeroed while moving + * from RESET state to RDY state. + */ + *rxq->rq_db = rte_cpu_to_be_32(0); + rte_cio_wmb(); + sm.is_wq = 1; + sm.queue_id = rxq->idx; + sm.state = IBV_WQS_RDY; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), + &sm)) + return -1; + if (mbuf_prepare) { + const uint16_t q_mask = wqe_n - 1; + uint16_t elt_idx; + struct rte_mbuf **elt; + int i; + unsigned int n = wqe_n - (rxq->rq_ci - + rxq->rq_pi); + + for (i = 0; i < (int)n; ++i) { + elt_idx = (rxq->rq_ci + i) & q_mask; + elt = &(*rxq->elts)[elt_idx]; + *elt = rte_mbuf_raw_alloc(rxq->mp); + if (!*elt) { + for (i--; i >= 0; --i) { + elt_idx = (rxq->rq_ci + + i) & q_mask; + elt = &(*rxq->elts) + [elt_idx]; + rte_pktmbuf_free_seg + (*elt); + } + return -1; + } + } + } + mlx5_rxq_initialize(rxq); + rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; + } + return ret; + default: + return -1; + } +} + /** * Get size of the next packet for a given CQE. For compressed CQEs, the * consumer index is updated only once all packets of the current one have @@ -1724,112 +2245,131 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) * Pointer to RX queue. * @param cqe * CQE to process. - * @param[out] rss_hash - * Packet RSS Hash result. + * @param[out] mcqe + * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not + * written. * * @return - * Packet size in bytes (0 if there is none), -1 in case of completion - * with error. + * 0 in case of empty CQE, otherwise the packet size in bytes. */ static inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, - uint16_t cqe_cnt, uint32_t *rss_hash) + uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) { struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; - int len = 0; + int len; uint16_t idx, end; - /* Process compressed data in the CQE and mini arrays. */ - if (zip->ai) { - volatile struct mlx5_mini_cqe8 (*mc)[8] = - (volatile struct mlx5_mini_cqe8 (*)[8]) - (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); - - len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); - *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result); - if ((++zip->ai & 7) == 0) { - /* Invalidate consumed CQEs */ - idx = zip->ca; - end = zip->na; - while (idx != end) { - (*rxq->cqes)[idx & cqe_cnt].op_own = - MLX5_CQE_INVALIDATE; - ++idx; - } - /* - * Increment consumer index to skip the number of - * CQEs consumed. Hardware leaves holes in the CQ - * ring for software use. - */ - zip->ca = zip->na; - zip->na += 8; - } - if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { - /* Invalidate the rest */ - idx = zip->ca; - end = zip->cq_ci; - - while (idx != end) { - (*rxq->cqes)[idx & cqe_cnt].op_own = - MLX5_CQE_INVALIDATE; - ++idx; - } - rxq->cq_ci = zip->cq_ci; - zip->ai = 0; - } - /* No compressed data, get next CQE and verify if it is compressed. */ - } else { - int ret; - int8_t op_own; - - ret = check_cqe(cqe, cqe_n, rxq->cq_ci); - if (unlikely(ret == 1)) - return 0; - ++rxq->cq_ci; - op_own = cqe->op_own; - rte_cio_rmb(); - if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { + do { + len = 0; + /* Process compressed data in the CQE and mini arrays. */ + if (zip->ai) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) - (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & + (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); - /* Fix endianness. */ - zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); - /* - * Current mini array position is the one returned by - * check_cqe64(). - * - * If completion comprises several mini arrays, as a - * special case the second one is located 7 CQEs after - * the initial CQE instead of 8 for subsequent ones. - */ - zip->ca = rxq->cq_ci; - zip->na = zip->ca + 7; - /* Compute the next non compressed CQE. */ - --rxq->cq_ci; - zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; - /* Get packet size to return. */ - len = rte_be_to_cpu_32((*mc)[0].byte_cnt); - *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result); - zip->ai = 1; - /* Prefetch all the entries to be invalidated */ - idx = zip->ca; - end = zip->cq_ci; - while (idx != end) { - rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); - ++idx; + len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); + *mcqe = &(*mc)[zip->ai & 7]; + if ((++zip->ai & 7) == 0) { + /* Invalidate consumed CQEs */ + idx = zip->ca; + end = zip->na; + while (idx != end) { + (*rxq->cqes)[idx & cqe_cnt].op_own = + MLX5_CQE_INVALIDATE; + ++idx; + } + /* + * Increment consumer index to skip the number + * of CQEs consumed. Hardware leaves holes in + * the CQ ring for software use. + */ + zip->ca = zip->na; + zip->na += 8; } + if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { + /* Invalidate the rest */ + idx = zip->ca; + end = zip->cq_ci; + + while (idx != end) { + (*rxq->cqes)[idx & cqe_cnt].op_own = + MLX5_CQE_INVALIDATE; + ++idx; + } + rxq->cq_ci = zip->cq_ci; + zip->ai = 0; + } + /* + * No compressed data, get next CQE and verify if it is + * compressed. + */ } else { - len = rte_be_to_cpu_32(cqe->byte_cnt); - *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res); + int ret; + int8_t op_own; + + ret = check_cqe(cqe, cqe_n, rxq->cq_ci); + if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { + if (unlikely(ret == MLX5_CQE_STATUS_ERR || + rxq->err_state)) { + ret = mlx5_rx_err_handle(rxq, 0); + if (ret == MLX5_CQE_STATUS_HW_OWN || + ret == -1) + return 0; + } else { + return 0; + } + } + ++rxq->cq_ci; + op_own = cqe->op_own; + if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { + volatile struct mlx5_mini_cqe8 (*mc)[8] = + (volatile struct mlx5_mini_cqe8 (*)[8]) + (uintptr_t)(&(*rxq->cqes) + [rxq->cq_ci & + cqe_cnt].pkt_info); + + /* Fix endianness. */ + zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); + /* + * Current mini array position is the one + * returned by check_cqe64(). + * + * If completion comprises several mini arrays, + * as a special case the second one is located + * 7 CQEs after the initial CQE instead of 8 + * for subsequent ones. + */ + zip->ca = rxq->cq_ci; + zip->na = zip->ca + 7; + /* Compute the next non compressed CQE. */ + --rxq->cq_ci; + zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; + /* Get packet size to return. */ + len = rte_be_to_cpu_32((*mc)[0].byte_cnt); + *mcqe = &(*mc)[0]; + zip->ai = 1; + /* Prefetch all to be invalidated */ + idx = zip->ca; + end = zip->cq_ci; + while (idx != end) { + rte_prefetch0(&(*rxq->cqes)[(idx) & + cqe_cnt]); + ++idx; + } + } else { + len = rte_be_to_cpu_32(cqe->byte_cnt); + } } - /* Error while receiving packet. */ - if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) - return -1; - } - return len; + if (unlikely(rxq->err_state)) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; + ++rxq->stats.idropped; + } else { + return len; + } + } while (1); } /** @@ -1936,7 +2476,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) volatile struct mlx5_wqe_data_seg *wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; struct rte_mbuf *rep = (*rxq->elts)[idx]; - uint32_t rss_hash_res = 0; + volatile struct mlx5_mini_cqe8 *mcqe = NULL; + uint32_t rss_hash_res; if (pkt) NEXT(seg) = rep; @@ -1966,24 +2507,21 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } if (!pkt) { cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, - &rss_hash_res); + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); if (!len) { rte_mbuf_raw_free(rep); break; } - if (unlikely(len == -1)) { - /* RX error, packet is likely too large. */ - rte_mbuf_raw_free(rep); - ++rxq->stats.idropped; - goto skip; - } pkt = seg; assert(len >= (rxq->crc_present << 2)); pkt->ol_flags = 0; + /* If compressed, take hash result from mini-CQE. */ + rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? + cqe->rx_hash_res : + mcqe->rx_hash_result); rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); if (rxq->crc_present) - len -= ETHER_CRC_LEN; + len -= RTE_ETHER_CRC_LEN; PKT_LEN(pkt) = len; } DATA_LEN(rep) = DATA_LEN(seg); @@ -2016,7 +2554,6 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) pkt = NULL; --pkts_n; ++i; -skip: /* Align consumer index to the next stride. */ rq_ci >>= sges_n; ++rq_ci; @@ -2105,8 +2642,8 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) const unsigned int wq_mask = (1 << rxq->elts_n) - 1; volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; unsigned int i = 0; - uint16_t rq_ci = rxq->rq_ci; - uint16_t strd_idx = rxq->strd_ci; + uint32_t rq_ci = rxq->rq_ci; + uint16_t consumed_strd = rxq->consumed_strd; struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; while (i < pkts_n) { @@ -2114,12 +2651,14 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) void *addr; int ret; unsigned int len; - uint16_t consumed_strd; + uint16_t strd_cnt; + uint16_t strd_idx; uint32_t offset; uint32_t byte_cnt; + volatile struct mlx5_mini_cqe8 *mcqe = NULL; uint32_t rss_hash_res = 0; - if (strd_idx == strd_n) { + if (consumed_strd == strd_n) { /* Replace WQE only if the buffer is still in use. */ if (rte_atomic16_read(&buf->refcnt) > 1) { mprq_buf_replace(rxq, rq_ci & wq_mask); @@ -2139,28 +2678,30 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rxq->mprq_repl = rep; } /* Advance to the next WQE. */ - strd_idx = 0; + consumed_strd = 0; ++rq_ci; buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; } cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; - ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &rss_hash_res); + ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); if (!ret) break; - if (unlikely(ret == -1)) { - /* RX error, packet is likely too large. */ - ++rxq->stats.idropped; - continue; - } byte_cnt = ret; - consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> - MLX5_MPRQ_STRIDE_NUM_SHIFT; - assert(consumed_strd); - /* Calculate offset before adding up stride index. */ - offset = strd_idx * strd_sz + strd_shift; - strd_idx += consumed_strd; + strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> + MLX5_MPRQ_STRIDE_NUM_SHIFT; + assert(strd_cnt); + consumed_strd += strd_cnt; if (byte_cnt & MLX5_MPRQ_FILLER_MASK) continue; + if (mcqe == NULL) { + rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); + strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); + } else { + /* mini-CQE for MPRQ doesn't have hash result. */ + strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); + } + assert(strd_idx < strd_n); + assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); /* * Currently configured to receive a packet per a stride. But if * MTU is adjusted through kernel interface, device could @@ -2168,7 +2709,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * case, the packet should be dropped because it is bigger than * the max_rx_pkt_len. */ - if (unlikely(consumed_strd > 1)) { + if (unlikely(strd_cnt > 1)) { ++rxq->stats.idropped; continue; } @@ -2180,7 +2721,8 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; assert((int)len >= (rxq->crc_present << 2)); if (rxq->crc_present) - len -= ETHER_CRC_LEN; + len -= RTE_ETHER_CRC_LEN; + offset = strd_idx * strd_sz + strd_shift; addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset); /* Initialize the offload flag. */ pkt->ol_flags = 0; @@ -2203,7 +2745,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } else { rte_iova_t buf_iova; struct rte_mbuf_ext_shared_info *shinfo; - uint16_t buf_len = consumed_strd * strd_sz; + uint16_t buf_len = strd_cnt * strd_sz; /* Increment the refcnt of the whole chunk. */ rte_atomic16_add_return(&buf->refcnt, 1); @@ -2252,12 +2794,12 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) ++i; } /* Update the consumer indexes. */ - rxq->strd_ci = strd_idx; - rte_io_wmb(); + rxq->consumed_strd = consumed_strd; + rte_cio_wmb(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); if (rq_ci != rxq->rq_ci) { rxq->rq_ci = rq_ci; - rte_io_wmb(); + rte_cio_wmb(); *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); } #ifdef MLX5_PMD_SOFT_COUNTERS @@ -2288,6 +2830,7 @@ removed_tx_burst(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t pkts_n __rte_unused) { + rte_mb(); return 0; } @@ -2312,6 +2855,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t pkts_n __rte_unused) { + rte_mb(); return 0; } @@ -2322,7 +2866,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused, * (e.g. mlx5_rxtx_vec_sse.c for x86). */ -uint16_t __attribute__((weak)) +__rte_weak uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t pkts_n __rte_unused) @@ -2330,7 +2874,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, return 0; } -uint16_t __attribute__((weak)) +__rte_weak uint16_t mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t pkts_n __rte_unused) @@ -2338,7 +2882,7 @@ mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, return 0; } -uint16_t __attribute__((weak)) +__rte_weak uint16_t mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, uint16_t pkts_n __rte_unused) @@ -2346,25 +2890,25 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, return 0; } -int __attribute__((weak)) +__rte_weak int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) { return -ENOTSUP; } -int __attribute__((weak)) +__rte_weak int mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) { return -ENOTSUP; } -int __attribute__((weak)) +__rte_weak int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) { return -ENOTSUP; } -int __attribute__((weak)) +__rte_weak int mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) { return -ENOTSUP;