X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=5c39cbb569a630e6215fccb01682ebcb1c81bbc3;hb=a6ca35aa74339cdd1596c9ba897097b2d2ba77ce;hp=2ca133ecf534d2ec160046465b0ec69b29e5ee09;hpb=2a66cf3789540dc2792bf8035f2ab2ed7fa0ac12;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 2ca133ecf5..5c39cbb569 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -152,6 +152,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, return 0; } +static inline void +txq_complete(struct txq *txq) __attribute__((always_inline)); + /** * Manage TX completions. * @@ -160,7 +163,7 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, * @param txq * Pointer to TX queue structure. */ -static void +static inline void txq_complete(struct txq *txq) { const unsigned int elts_n = txq->elts_n; @@ -303,6 +306,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe, { wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); + wqe->wqe.ctrl.data[2] = 0; wqe->wqe.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -348,6 +352,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); + wqe->wqe.ctrl.data[2] = 0; wqe->wqe.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -425,6 +430,7 @@ mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe, assert(size < 64); wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); + wqe->inl.ctrl.data[2] = 0; wqe->inl.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -469,8 +475,8 @@ mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, (uint8_t *)addr, 12); rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 12, &vlan, sizeof(vlan)); - rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 16, - ((uint8_t *)addr + 12), 2); + rte_memcpy((uint8_t *)((uintptr_t)wqe->inl.eseg.inline_hdr_start + 16), + (uint8_t *)(addr + 12), 2); addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); size = (sizeof(wqe->inl.ctrl.ctrl) + @@ -498,6 +504,7 @@ mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, assert(size < 64); wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); + wqe->inl.ctrl.data[2] = 0; wqe->inl.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -585,50 +592,56 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; - unsigned int i; + unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; - volatile union mlx5_wqe *wqe; - struct rte_mbuf *buf; + volatile union mlx5_wqe *wqe = NULL; if (unlikely(!pkts_n)) return 0; - buf = pkts[0]; /* Prefetch first packet cacheline. */ tx_prefetch_cqe(txq, txq->cq_ci); tx_prefetch_cqe(txq, txq->cq_ci + 1); - rte_prefetch0(buf); + rte_prefetch0(*pkts); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; - assert(max >= 1); - assert(max <= elts_n); - /* Always leave one free entry in the ring. */ - --max; - if (max == 0) - return 0; - if (max > pkts_n) - max = pkts_n; - for (i = 0; (i != max); ++i) { - unsigned int elts_head_next = (elts_head + 1) & (elts_n - 1); + do { + struct rte_mbuf *buf = *(pkts++); + unsigned int elts_head_next; uintptr_t addr; uint32_t length; uint32_t lkey; + unsigned int segs_n = buf->nb_segs; + volatile struct mlx5_wqe_data_seg *dseg; + unsigned int ds = sizeof(*wqe) / 16; + /* + * Make sure there is enough room to store this packet and + * that one ring entry remains unused. + */ + assert(segs_n); + if (max < segs_n + 1) + break; + max -= segs_n; + --pkts_n; + elts_head_next = (elts_head + 1) & (elts_n - 1); wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; + dseg = &wqe->wqe.dseg; rte_prefetch0(wqe); - if (i + 1 < max) - rte_prefetch0(pkts[i + 1]); + if (pkts_n) + rte_prefetch0(*pkts); /* Retrieve buffer information. */ addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); /* Update element. */ (*txq->elts)[elts_head] = buf; /* Prefetch next buffer data. */ - if (i + 1 < max) - rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 1], + if (pkts_n) + rte_prefetch0(rte_pktmbuf_mtod(*pkts, volatile void *)); /* Retrieve Memory Region key for this memory pool. */ lkey = txq_mp2mr(txq, txq_mb2mp(buf)); @@ -637,7 +650,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) buf->vlan_tci); else mlx5_wqe_write(txq, wqe, addr, length, lkey); - wqe->wqe.ctrl.data[2] = 0; /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { @@ -647,18 +659,49 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } else { wqe->wqe.eseg.cs_flags = 0; } + while (--segs_n) { + /* + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. + */ + assert(!(sizeof(*wqe) % sizeof(*dseg))); + if (!(ds % (sizeof(*wqe) / 16))) + dseg = (volatile void *) + &(*txq->wqes)[txq->wqe_ci++ & + (txq->wqe_n - 1)]; + else + ++dseg; + ++ds; + buf = buf->next; + assert(buf); + /* Store segment information. */ + dseg->byte_count = htonl(DATA_LEN(buf)); + dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); + dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + (*txq->elts)[elts_head_next] = buf; + elts_head_next = (elts_head_next + 1) & (elts_n - 1); +#ifdef MLX5_PMD_SOFT_COUNTERS + length += DATA_LEN(buf); +#endif + ++j; + } + /* Update DS field in WQE. */ + wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0); + wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f); + elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += length; #endif elts_head = elts_head_next; - buf = pkts[i + 1]; - } + ++i; + } while (pkts_n); /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ wqe->wqe.ctrl.data[2] = htonl(8); @@ -697,44 +740,50 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; - unsigned int i; + unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; - volatile union mlx5_wqe *wqe; - struct rte_mbuf *buf; + volatile union mlx5_wqe *wqe = NULL; unsigned int max_inline = txq->max_inline; if (unlikely(!pkts_n)) return 0; - buf = pkts[0]; /* Prefetch first packet cacheline. */ tx_prefetch_cqe(txq, txq->cq_ci); tx_prefetch_cqe(txq, txq->cq_ci + 1); - rte_prefetch0(buf); + rte_prefetch0(*pkts); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; - assert(max >= 1); - assert(max <= elts_n); - /* Always leave one free entry in the ring. */ - --max; - if (max == 0) - return 0; - if (max > pkts_n) - max = pkts_n; - for (i = 0; (i != max); ++i) { - unsigned int elts_head_next = (elts_head + 1) & (elts_n - 1); + do { + struct rte_mbuf *buf = *(pkts++); + unsigned int elts_head_next; uintptr_t addr; uint32_t length; uint32_t lkey; + unsigned int segs_n = buf->nb_segs; + volatile struct mlx5_wqe_data_seg *dseg; + unsigned int ds = sizeof(*wqe) / 16; + /* + * Make sure there is enough room to store this packet and + * that one ring entry remains unused. + */ + assert(segs_n); + if (max < segs_n + 1) + break; + max -= segs_n; + --pkts_n; + elts_head_next = (elts_head + 1) & (elts_n - 1); wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; + dseg = &wqe->wqe.dseg; tx_prefetch_wqe(txq, txq->wqe_ci); tx_prefetch_wqe(txq, txq->wqe_ci + 1); - if (i + 1 < max) - rte_prefetch0(pkts[i + 1]); + if (pkts_n) + rte_prefetch0(*pkts); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { @@ -750,16 +799,17 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update element. */ (*txq->elts)[elts_head] = buf; /* Prefetch next buffer data. */ - if (i + 1 < max) - rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 1], + if (pkts_n) + rte_prefetch0(rte_pktmbuf_mtod(*pkts, volatile void *)); - if (length <= max_inline) { + if ((length <= max_inline) && (segs_n == 1)) { if (buf->ol_flags & PKT_TX_VLAN_PKT) mlx5_wqe_write_inline_vlan(txq, wqe, addr, length, buf->vlan_tci); else mlx5_wqe_write_inline(txq, wqe, addr, length); + goto skip_segs; } else { /* Retrieve Memory Region key for this memory pool. */ lkey = txq_mp2mr(txq, txq_mb2mp(buf)); @@ -769,19 +819,49 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) else mlx5_wqe_write(txq, wqe, addr, length, lkey); } - wqe->inl.ctrl.data[2] = 0; + while (--segs_n) { + /* + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. + */ + assert(!(sizeof(*wqe) % sizeof(*dseg))); + if (!(ds % (sizeof(*wqe) / 16))) + dseg = (volatile void *) + &(*txq->wqes)[txq->wqe_ci++ & + (txq->wqe_n - 1)]; + else + ++dseg; + ++ds; + buf = buf->next; + assert(buf); + /* Store segment information. */ + dseg->byte_count = htonl(DATA_LEN(buf)); + dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); + dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + (*txq->elts)[elts_head_next] = buf; + elts_head_next = (elts_head_next + 1) & (elts_n - 1); +#ifdef MLX5_PMD_SOFT_COUNTERS + length += DATA_LEN(buf); +#endif + ++j; + } + /* Update DS field in WQE. */ + wqe->inl.ctrl.data[1] &= htonl(0xffffffc0); + wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f); +skip_segs: elts_head = elts_head_next; - buf = pkts[i + 1]; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += length; #endif - } + ++i; + } while (pkts_n); /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ wqe->inl.ctrl.data[2] = htonl(8); @@ -801,6 +881,475 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) return i; } +/** + * Open a MPW session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + * @param length + * Packet length. + */ +static inline void +mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) +{ + uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = + (volatile struct mlx5_wqe_data_seg (*)[]) + (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)]; + + mpw->state = MLX5_MPW_STATE_OPENED; + mpw->pkts_n = 0; + mpw->len = length; + mpw->total_len = 0; + mpw->wqe = &(*txq->wqes)[idx]; + mpw->wqe->mpw.eseg.mss = htons(length); + mpw->wqe->mpw.eseg.inline_hdr_sz = 0; + mpw->wqe->mpw.eseg.rsvd0 = 0; + mpw->wqe->mpw.eseg.rsvd1 = 0; + mpw->wqe->mpw.eseg.rsvd2 = 0; + mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | + MLX5_OPCODE_LSO_MPW); + mpw->wqe->mpw.ctrl.data[2] = 0; + mpw->wqe->mpw.ctrl.data[3] = 0; + mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0]; + mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1]; + mpw->data.dseg[2] = &(*dseg)[0]; + mpw->data.dseg[3] = &(*dseg)[1]; + mpw->data.dseg[4] = &(*dseg)[2]; +} + +/** + * Close a MPW session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + */ +static inline void +mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) +{ + unsigned int num = mpw->pkts_n; + + /* + * Store size in multiple of 16 bytes. Control and Ethernet segments + * count as 2. + */ + mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num)); + mpw->state = MLX5_MPW_STATE_CLOSED; + if (num < 3) + ++txq->wqe_ci; + else + txq->wqe_ci += 2; + tx_prefetch_wqe(txq, txq->wqe_ci); + tx_prefetch_wqe(txq, txq->wqe_ci + 1); +} + +/** + * DPDK callback for TX with MPW support. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct txq *txq = (struct txq *)dpdk_txq; + uint16_t elts_head = txq->elts_head; + const unsigned int elts_n = txq->elts_n; + unsigned int i = 0; + unsigned int j = 0; + unsigned int max; + unsigned int comp; + struct mlx5_mpw mpw = { + .state = MLX5_MPW_STATE_CLOSED, + }; + + if (unlikely(!pkts_n)) + return 0; + /* Prefetch first packet cacheline. */ + tx_prefetch_cqe(txq, txq->cq_ci); + tx_prefetch_wqe(txq, txq->wqe_ci); + tx_prefetch_wqe(txq, txq->wqe_ci + 1); + /* Start processing. */ + txq_complete(txq); + max = (elts_n - (elts_head - txq->elts_tail)); + if (max > elts_n) + max -= elts_n; + do { + struct rte_mbuf *buf = *(pkts++); + unsigned int elts_head_next; + uint32_t length; + unsigned int segs_n = buf->nb_segs; + uint32_t cs_flags = 0; + + /* + * Make sure there is enough room to store this packet and + * that one ring entry remains unused. + */ + assert(segs_n); + if (max < segs_n + 1) + break; + /* Do not bother with large packets MPW cannot handle. */ + if (segs_n > MLX5_MPW_DSEG_MAX) + break; + max -= segs_n; + --pkts_n; + /* Should we enable HW CKSUM offload */ + if (buf->ol_flags & + (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + /* Retrieve packet information. */ + length = PKT_LEN(buf); + assert(length); + /* Start new session if packet differs. */ + if ((mpw.state == MLX5_MPW_STATE_OPENED) && + ((mpw.len != length) || + (segs_n != 1) || + (mpw.wqe->mpw.eseg.cs_flags != cs_flags))) + mlx5_mpw_close(txq, &mpw); + if (mpw.state == MLX5_MPW_STATE_CLOSED) { + mlx5_mpw_new(txq, &mpw, length); + mpw.wqe->mpw.eseg.cs_flags = cs_flags; + } + /* Multi-segment packets must be alone in their MPW. */ + assert((segs_n == 1) || (mpw.pkts_n == 0)); +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length = 0; +#endif + do { + volatile struct mlx5_wqe_data_seg *dseg; + uintptr_t addr; + + elts_head_next = (elts_head + 1) & (elts_n - 1); + assert(buf); + (*txq->elts)[elts_head] = buf; + dseg = mpw.data.dseg[mpw.pkts_n]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + *dseg = (struct mlx5_wqe_data_seg){ + .byte_count = htonl(DATA_LEN(buf)), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .addr = htonll(addr), + }; + elts_head = elts_head_next; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length += DATA_LEN(buf); +#endif + buf = buf->next; + ++mpw.pkts_n; + ++j; + } while (--segs_n); + assert(length == mpw.len); + if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) + mlx5_mpw_close(txq, &mpw); + elts_head = elts_head_next; +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent bytes counter. */ + txq->stats.obytes += length; +#endif + ++i; + } while (pkts_n); + /* Take a shortcut if nothing must be sent. */ + if (unlikely(i == 0)) + return 0; + /* Check whether completion threshold has been reached. */ + /* "j" includes both packets and segments. */ + comp = txq->elts_comp + j; + if (comp >= MLX5_TX_COMP_THRESH) { + volatile union mlx5_wqe *wqe = mpw.wqe; + + /* Request completion on last WQE. */ + wqe->mpw.ctrl.data[2] = htonl(8); + /* Save elts_head in unused "immediate" field of WQE. */ + wqe->mpw.ctrl.data[3] = elts_head; + txq->elts_comp = 0; + } else { + txq->elts_comp = comp; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent packets counter. */ + txq->stats.opackets += i; +#endif + /* Ring QP doorbell. */ + if (mpw.state == MLX5_MPW_STATE_OPENED) + mlx5_mpw_close(txq, &mpw); + mlx5_tx_dbrec(txq); + txq->elts_head = elts_head; + return i; +} + +/** + * Open a MPW inline session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + * @param length + * Packet length. + */ +static inline void +mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) +{ + uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + + mpw->state = MLX5_MPW_INL_STATE_OPENED; + mpw->pkts_n = 0; + mpw->len = length; + mpw->total_len = 0; + mpw->wqe = &(*txq->wqes)[idx]; + mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | + MLX5_OPCODE_LSO_MPW); + mpw->wqe->mpw_inl.ctrl.data[2] = 0; + mpw->wqe->mpw_inl.ctrl.data[3] = 0; + mpw->wqe->mpw_inl.eseg.mss = htons(length); + mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0; + mpw->wqe->mpw_inl.eseg.cs_flags = 0; + mpw->wqe->mpw_inl.eseg.rsvd0 = 0; + mpw->wqe->mpw_inl.eseg.rsvd1 = 0; + mpw->wqe->mpw_inl.eseg.rsvd2 = 0; + mpw->data.raw = &mpw->wqe->mpw_inl.data[0]; +} + +/** + * Close a MPW inline session. + * + * @param txq + * Pointer to TX queue structure. + * @param mpw + * Pointer to MPW session structure. + */ +static inline void +mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw) +{ + unsigned int size; + + size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len; + /* + * Store size in multiple of 16 bytes. Control and Ethernet segments + * count as 2. + */ + mpw->wqe->mpw_inl.ctrl.data[1] = + htonl(txq->qp_num_8s | ((size + 15) / 16)); + mpw->state = MLX5_MPW_STATE_CLOSED; + mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); + txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe); +} + +/** + * DPDK callback for TX with MPW inline support. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n) +{ + struct txq *txq = (struct txq *)dpdk_txq; + uint16_t elts_head = txq->elts_head; + const unsigned int elts_n = txq->elts_n; + unsigned int i = 0; + unsigned int j = 0; + unsigned int max; + unsigned int comp; + unsigned int inline_room = txq->max_inline; + struct mlx5_mpw mpw = { + .state = MLX5_MPW_STATE_CLOSED, + }; + + if (unlikely(!pkts_n)) + return 0; + /* Prefetch first packet cacheline. */ + tx_prefetch_cqe(txq, txq->cq_ci); + tx_prefetch_wqe(txq, txq->wqe_ci); + tx_prefetch_wqe(txq, txq->wqe_ci + 1); + /* Start processing. */ + txq_complete(txq); + max = (elts_n - (elts_head - txq->elts_tail)); + if (max > elts_n) + max -= elts_n; + do { + struct rte_mbuf *buf = *(pkts++); + unsigned int elts_head_next; + uintptr_t addr; + uint32_t length; + unsigned int segs_n = buf->nb_segs; + uint32_t cs_flags = 0; + + /* + * Make sure there is enough room to store this packet and + * that one ring entry remains unused. + */ + assert(segs_n); + if (max < segs_n + 1) + break; + /* Do not bother with large packets MPW cannot handle. */ + if (segs_n > MLX5_MPW_DSEG_MAX) + break; + max -= segs_n; + --pkts_n; + /* Should we enable HW CKSUM offload */ + if (buf->ol_flags & + (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + /* Retrieve packet information. */ + length = PKT_LEN(buf); + /* Start new session if packet differs. */ + if (mpw.state == MLX5_MPW_STATE_OPENED) { + if ((mpw.len != length) || + (segs_n != 1) || + (mpw.wqe->mpw.eseg.cs_flags != cs_flags)) + mlx5_mpw_close(txq, &mpw); + } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { + if ((mpw.len != length) || + (segs_n != 1) || + (length > inline_room) || + (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) { + mlx5_mpw_inline_close(txq, &mpw); + inline_room = txq->max_inline; + } + } + if (mpw.state == MLX5_MPW_STATE_CLOSED) { + if ((segs_n != 1) || + (length > inline_room)) { + mlx5_mpw_new(txq, &mpw, length); + mpw.wqe->mpw.eseg.cs_flags = cs_flags; + } else { + mlx5_mpw_inline_new(txq, &mpw, length); + mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags; + } + } + /* Multi-segment packets must be alone in their MPW. */ + assert((segs_n == 1) || (mpw.pkts_n == 0)); + if (mpw.state == MLX5_MPW_STATE_OPENED) { + assert(inline_room == txq->max_inline); +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length = 0; +#endif + do { + volatile struct mlx5_wqe_data_seg *dseg; + + elts_head_next = + (elts_head + 1) & (elts_n - 1); + assert(buf); + (*txq->elts)[elts_head] = buf; + dseg = mpw.data.dseg[mpw.pkts_n]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + *dseg = (struct mlx5_wqe_data_seg){ + .byte_count = htonl(DATA_LEN(buf)), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .addr = htonll(addr), + }; + elts_head = elts_head_next; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length += DATA_LEN(buf); +#endif + buf = buf->next; + ++mpw.pkts_n; + ++j; + } while (--segs_n); + assert(length == mpw.len); + if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) + mlx5_mpw_close(txq, &mpw); + } else { + unsigned int max; + + assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); + assert(length <= inline_room); + assert(length == DATA_LEN(buf)); + elts_head_next = (elts_head + 1) & (elts_n - 1); + addr = rte_pktmbuf_mtod(buf, uintptr_t); + (*txq->elts)[elts_head] = buf; + /* Maximum number of bytes before wrapping. */ + max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] - + (uintptr_t)mpw.data.raw); + if (length > max) { + rte_memcpy((void *)(uintptr_t)mpw.data.raw, + (void *)addr, + max); + mpw.data.raw = + (volatile void *)&(*txq->wqes)[0]; + rte_memcpy((void *)(uintptr_t)mpw.data.raw, + (void *)(addr + max), + length - max); + mpw.data.raw += length - max; + } else { + rte_memcpy((void *)(uintptr_t)mpw.data.raw, + (void *)addr, + length); + mpw.data.raw += length; + } + if ((uintptr_t)mpw.data.raw == + (uintptr_t)&(*txq->wqes)[txq->wqe_n]) + mpw.data.raw = + (volatile void *)&(*txq->wqes)[0]; + ++mpw.pkts_n; + ++j; + if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { + mlx5_mpw_inline_close(txq, &mpw); + inline_room = txq->max_inline; + } else { + inline_room -= length; + } + } + mpw.total_len += length; + elts_head = elts_head_next; +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent bytes counter. */ + txq->stats.obytes += length; +#endif + ++i; + } while (pkts_n); + /* Take a shortcut if nothing must be sent. */ + if (unlikely(i == 0)) + return 0; + /* Check whether completion threshold has been reached. */ + /* "j" includes both packets and segments. */ + comp = txq->elts_comp + j; + if (comp >= MLX5_TX_COMP_THRESH) { + volatile union mlx5_wqe *wqe = mpw.wqe; + + /* Request completion on last WQE. */ + wqe->mpw_inl.ctrl.data[2] = htonl(8); + /* Save elts_head in unused "immediate" field of WQE. */ + wqe->mpw_inl.ctrl.data[3] = elts_head; + txq->elts_comp = 0; + } else { + txq->elts_comp = comp; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Increment sent packets counter. */ + txq->stats.opackets += i; +#endif + /* Ring QP doorbell. */ + if (mpw.state == MLX5_MPW_INL_STATE_OPENED) + mlx5_mpw_inline_close(txq, &mpw); + else if (mpw.state == MLX5_MPW_STATE_OPENED) + mlx5_mpw_close(txq, &mpw); + mlx5_tx_dbrec(txq); + txq->elts_head = elts_head; + return i; +} + /** * Translate RX completion flags to packet type. * @@ -1002,99 +1551,134 @@ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = dpdk_rxq; - unsigned int pkts_ret = 0; - unsigned int i; - unsigned int rq_ci = rxq->rq_ci; - const unsigned int elts_n = rxq->elts_n; - const unsigned int wqe_cnt = elts_n - 1; + const unsigned int wqe_cnt = rxq->elts_n - 1; const unsigned int cqe_cnt = rxq->cqe_n - 1; + const unsigned int sges_n = rxq->sges_n; + struct rte_mbuf *pkt = NULL; + struct rte_mbuf *seg = NULL; + volatile struct mlx5_cqe64 *cqe = + &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; + unsigned int i = 0; + unsigned int rq_ci = rxq->rq_ci << sges_n; + int len; - for (i = 0; (i != pkts_n); ++i) { + while (pkts_n) { unsigned int idx = rq_ci & wqe_cnt; - int len; - struct rte_mbuf *rep; - struct rte_mbuf *pkt; volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; - volatile struct mlx5_cqe64 *cqe = - &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; + struct rte_mbuf *rep = (*rxq->elts)[idx]; - pkt = (*rxq->elts)[idx]; + if (pkt) + NEXT(seg) = rep; + seg = rep; + rte_prefetch0(seg); rte_prefetch0(cqe); + rte_prefetch0(wqe); rep = rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { ++rxq->stats.rx_nombuf; + if (!pkt) { + /* + * no buffers before we even started, + * bail out silently. + */ + break; + } + while (pkt != seg) { + assert(pkt != (*rxq->elts)[idx]); + seg = NEXT(pkt); + rte_mbuf_refcnt_set(pkt, 0); + __rte_mbuf_raw_free(pkt); + pkt = seg; + } break; } - SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM); - NB_SEGS(rep) = 1; - PORT(rep) = rxq->port_id; - NEXT(rep) = NULL; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt); - if (unlikely(len == 0)) { - rte_mbuf_refcnt_set(rep, 0); - __rte_mbuf_raw_free(rep); - break; - } - if (unlikely(len == -1)) { - /* RX error, packet is likely too large. */ - rte_mbuf_refcnt_set(rep, 0); - __rte_mbuf_raw_free(rep); - ++rxq->stats.idropped; - --i; - goto skip; + if (!pkt) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt); + if (len == 0) { + rte_mbuf_refcnt_set(rep, 0); + __rte_mbuf_raw_free(rep); + break; + } + if (unlikely(len == -1)) { + /* RX error, packet is likely too large. */ + rte_mbuf_refcnt_set(rep, 0); + __rte_mbuf_raw_free(rep); + ++rxq->stats.idropped; + goto skip; + } + pkt = seg; + assert(len >= (rxq->crc_present << 2)); + /* Update packet information. */ + pkt->packet_type = 0; + pkt->ol_flags = 0; + if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | + rxq->crc_present) { + if (rxq->csum) { + pkt->packet_type = + rxq_cq_to_pkt_type(cqe); + pkt->ol_flags = + rxq_cq_to_ol_flags(rxq, cqe); + } + if (cqe->l4_hdr_type_etc & + MLX5_CQE_VLAN_STRIPPED) { + pkt->ol_flags |= PKT_RX_VLAN_PKT | + PKT_RX_VLAN_STRIPPED; + pkt->vlan_tci = ntohs(cqe->vlan_info); + } + if (rxq->crc_present) + len -= ETHER_CRC_LEN; + } + PKT_LEN(pkt) = len; } + DATA_LEN(rep) = DATA_LEN(seg); + PKT_LEN(rep) = PKT_LEN(seg); + SET_DATA_OFF(rep, DATA_OFF(seg)); + NB_SEGS(rep) = NB_SEGS(seg); + PORT(rep) = PORT(seg); + NEXT(rep) = NULL; + (*rxq->elts)[idx] = rep; /* * Fill NIC descriptor with the new buffer. The lkey and size * of the buffers are already known, only the buffer address * changes. */ - wqe->addr = htonll((uintptr_t)rep->buf_addr + - RTE_PKTMBUF_HEADROOM); - (*rxq->elts)[idx] = rep; - /* Update pkt information. */ - if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | - rxq->crc_present) { - if (rxq->csum) { - pkt->packet_type = rxq_cq_to_pkt_type(cqe); - pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe); - } - if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) { - pkt->ol_flags |= PKT_RX_VLAN_PKT | - PKT_RX_VLAN_STRIPPED; - pkt->vlan_tci = ntohs(cqe->vlan_info); - } - if (rxq->crc_present) - len -= ETHER_CRC_LEN; + wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t)); + if (len > DATA_LEN(seg)) { + len -= DATA_LEN(seg); + ++NB_SEGS(pkt); + ++rq_ci; + continue; } - PKT_LEN(pkt) = len; - DATA_LEN(pkt) = len; + DATA_LEN(seg) = len; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment bytes counter. */ - rxq->stats.ibytes += len; + rxq->stats.ibytes += PKT_LEN(pkt); #endif /* Return packet. */ *(pkts++) = pkt; - ++pkts_ret; + pkt = NULL; + --pkts_n; + ++i; skip: + /* Align consumer index to the next stride. */ + rq_ci >>= sges_n; ++rq_ci; + rq_ci <<= sges_n; } - if (unlikely((i == 0) && (rq_ci == rxq->rq_ci))) + if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) return 0; - /* Repost WRs. */ -#ifdef DEBUG_RECV - DEBUG("%p: reposting %u WRs", (void *)rxq, i); -#endif /* Update the consumer index. */ - rxq->rq_ci = rq_ci; + rxq->rq_ci = rq_ci >> sges_n; rte_wmb(); *rxq->cq_db = htonl(rxq->cq_ci); rte_wmb(); *rxq->rq_db = htonl(rxq->rq_ci); #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment packets counter. */ - rxq->stats.ipackets += pkts_ret; + rxq->stats.ipackets += i; #endif - return pkts_ret; + return i; } /**