-void mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
- struct rte_eth_rxq_info *qinfo);
-void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
- struct rte_eth_txq_info *qinfo);
-int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
- struct rte_eth_burst_mode *mode);
-int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
- struct rte_eth_burst_mode *mode);
-
-/* Vectorized version of mlx5_rxtx.c */
-int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
-int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
-uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
- uint16_t pkts_n);
-uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_txq, struct rte_mbuf **pkts,
- uint16_t pkts_n);
-
-/* mlx5_mr.c */
-
-void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
-uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
-uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb);
-uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
- struct rte_mempool *mp);
-int mlx5_dma_map(struct rte_pci_device *pdev, void *addr, uint64_t iova,
- size_t len);
-int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova,
- size_t len);
-
-/**
- * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
- * 64bit architectures.
- *
- * @param val
- * value to write in CPU endian format.
- * @param addr
- * Address to write to.
- * @param lock
- * Address of the lock to use for that UAR access.
- */
-static __rte_always_inline void
-__mlx5_uar_write64_relaxed(uint64_t val, void *addr,
- rte_spinlock_t *lock __rte_unused)
-{
-#ifdef RTE_ARCH_64
- *(uint64_t *)addr = val;
-#else /* !RTE_ARCH_64 */
- rte_spinlock_lock(lock);
- *(uint32_t *)addr = val;
- rte_io_wmb();
- *((uint32_t *)addr + 1) = val >> 32;
- rte_spinlock_unlock(lock);
-#endif
-}
-
-/**
- * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
- * 64bit architectures while guaranteeing the order of execution with the
- * code being executed.
- *
- * @param val
- * value to write in CPU endian format.
- * @param addr
- * Address to write to.
- * @param lock
- * Address of the lock to use for that UAR access.
- */
-static __rte_always_inline void
-__mlx5_uar_write64(uint64_t val, void *addr, rte_spinlock_t *lock)
-{
- rte_io_wmb();
- __mlx5_uar_write64_relaxed(val, addr, lock);
-}
-
-/* Assist macros, used instead of directly calling the functions they wrap. */
-#ifdef RTE_ARCH_64
-#define mlx5_uar_write64_relaxed(val, dst, lock) \
- __mlx5_uar_write64_relaxed(val, dst, NULL)
-#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
-#else
-#define mlx5_uar_write64_relaxed(val, dst, lock) \
- __mlx5_uar_write64_relaxed(val, dst, lock)
-#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
-#endif
-
-/**
- * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
- * cloned mbuf is allocated is returned instead.
- *
- * @param buf
- * Pointer to mbuf.
- *
- * @return
- * Memory pool where data is located for given mbuf.
- */
-static inline struct rte_mempool *
-mlx5_mb2mp(struct rte_mbuf *buf)
-{
- if (unlikely(RTE_MBUF_CLONED(buf)))
- return rte_mbuf_from_indirect(buf)->pool;
- return buf->pool;
-}
-
-/**
- * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
- * as mempool is pre-configured and static.
- *
- * @param rxq
- * Pointer to Rx queue structure.
- * @param addr
- * Address to search.
- *
- * @return
- * Searched LKey on success, UINT32_MAX on no match.
- */
-static __rte_always_inline uint32_t
-mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
-{
- struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl;
- uint32_t lkey;
-
- /* Linear search on MR cache array. */
- lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
- MLX5_MR_CACHE_N, addr);
- if (likely(lkey != UINT32_MAX))
- return lkey;
- /* Take slower bottom-half (Binary Search) on miss. */
- return mlx5_rx_addr2mr_bh(rxq, addr);
-}
-
-#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
-
-/**
- * Query LKey from a packet buffer for Tx. If not found, add the mempool.
- *
- * @param txq
- * Pointer to Tx queue structure.
- * @param addr
- * Address to search.
- *
- * @return
- * Searched LKey on success, UINT32_MAX on no match.
- */
-static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
-{
- struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
- uintptr_t addr = (uintptr_t)mb->buf_addr;
- uint32_t lkey;
-
- /* Check generation bit to see if there's any change on existing MRs. */
- if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
- mlx5_mr_flush_local_cache(mr_ctrl);
- /* Linear search on MR cache array. */
- lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
- MLX5_MR_CACHE_N, addr);
- if (likely(lkey != UINT32_MAX))
- return lkey;
- /* Take slower bottom-half on miss. */
- return mlx5_tx_mb2mr_bh(txq, mb);
-}
-
-/**
- * Ring TX queue doorbell and flush the update if requested.
- *
- * @param txq
- * Pointer to TX queue structure.
- * @param wqe
- * Pointer to the last WQE posted in the NIC.
- * @param cond
- * Request for write memory barrier after BlueFlame update.
- */
-static __rte_always_inline void
-mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
- int cond)
-{
- uint64_t *dst = MLX5_TX_BFREG(txq);
- volatile uint64_t *src = ((volatile uint64_t *)wqe);
-
- rte_io_wmb();
- *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
- /* Ensure ordering between DB record and BF copy. */
- rte_wmb();
- mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
- if (cond)
- rte_wmb();
-}
-
-/**
- * Ring TX queue doorbell and flush the update by write memory barrier.
- *
- * @param txq
- * Pointer to TX queue structure.
- * @param wqe
- * Pointer to the last WQE posted in the NIC.
- */
-static __rte_always_inline void
-mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
-{
- mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
-}
-
-/**
- * Convert timestamp from HW format to linear counter
- * from Packet Pacing Clock Queue CQE timestamp format.
- *
- * @param sh
- * Pointer to the device shared context. Might be needed
- * to convert according current device configuration.
- * @param ts
- * Timestamp from CQE to convert.
- * @return
- * UTC in nanoseconds
- */
-static __rte_always_inline uint64_t
-mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
-{
- RTE_SET_USED(sh);
- return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
-}
-
-/**
- * Convert timestamp from mbuf format to linear counter
- * of Clock Queue completions (24 bits)
- *
- * @param sh
- * Pointer to the device shared context to fetch Tx
- * packet pacing timestamp and parameters.
- * @param ts
- * Timestamp from mbuf to convert.
- * @return
- * positive or zero value - completion ID to wait
- * negative value - conversion error
- */
-static __rte_always_inline int32_t
-mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts)
-{
- uint64_t ts, ci;
- uint32_t tick;
-
- do {
- /*
- * Read atomically two uint64_t fields and compare lsb bits.
- * It there is no match - the timestamp was updated in
- * the service thread, data should be re-read.
- */
- rte_compiler_barrier();
- ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED);
- ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED);
- rte_compiler_barrier();
- if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH)))
- break;
- } while (true);
- /* Perform the skew correction, positive value to send earlier. */
- mts -= sh->txpp.skew;
- mts -= ts;
- if (unlikely(mts >= UINT64_MAX / 2)) {
- /* We have negative integer, mts is in the past. */
- __atomic_fetch_add(&sh->txpp.err_ts_past,
- 1, __ATOMIC_RELAXED);
- return -1;
- }
- tick = sh->txpp.tick;
- MLX5_ASSERT(tick);
- /* Convert delta to completions, round up. */
- mts = (mts + tick - 1) / tick;
- if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) {
- /* We have mts is too distant future. */
- __atomic_fetch_add(&sh->txpp.err_ts_future,
- 1, __ATOMIC_RELAXED);
- return -1;
- }
- mts <<= 64 - MLX5_CQ_INDEX_WIDTH;
- ci += mts;
- ci >>= 64 - MLX5_CQ_INDEX_WIDTH;
- return ci;
-}
-
-/**
- * Set timestamp in mbuf dynamic field.
- *
- * @param mbuf
- * Structure to write into.
- * @param offset
- * Dynamic field offset in mbuf structure.
- * @param timestamp
- * Value to write.
- */
-static __rte_always_inline void
-mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset,
- rte_mbuf_timestamp_t timestamp)
-{
- *RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = timestamp;
-}
-
-/**
- * Replace MPRQ buffer.
- *
- * @param rxq
- * Pointer to Rx queue structure.
- * @param rq_idx
- * RQ index to replace.
- */
-static __rte_always_inline void
-mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
-{
- const uint32_t strd_n = 1 << rxq->strd_num_n;
- struct mlx5_mprq_buf *rep = rxq->mprq_repl;
- volatile struct mlx5_wqe_data_seg *wqe =
- &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
- struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx];
- void *addr;
-
- if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) {
- MLX5_ASSERT(rep != NULL);
- /* Replace MPRQ buf. */
- (*rxq->mprq_bufs)[rq_idx] = rep;
- /* Replace WQE. */
- addr = mlx5_mprq_buf_addr(rep, strd_n);
- wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
- /* If there's only one MR, no need to replace LKey in WQE. */
- if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
- wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
- /* Stash a mbuf for next replacement. */
- if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
- rxq->mprq_repl = rep;
- else
- rxq->mprq_repl = NULL;
- /* Release the old buffer. */
- mlx5_mprq_buf_free(buf);
- } else if (unlikely(rxq->mprq_repl == NULL)) {
- struct mlx5_mprq_buf *rep;
-
- /*
- * Currently, the MPRQ mempool is out of buffer
- * and doing memcpy regardless of the size of Rx
- * packet. Retry allocation to get back to
- * normal.
- */
- if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep))
- rxq->mprq_repl = rep;
- }
-}
-
-/**
- * Attach or copy MPRQ buffer content to a packet.
- *
- * @param rxq
- * Pointer to Rx queue structure.
- * @param pkt
- * Pointer to a packet to fill.
- * @param len
- * Packet length.
- * @param buf
- * Pointer to a MPRQ buffer to take the data from.
- * @param strd_idx
- * Stride index to start from.
- * @param strd_cnt
- * Number of strides to consume.
- */
-static __rte_always_inline enum mlx5_rqx_code
-mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len,
- struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t strd_cnt)
-{
- const uint32_t strd_n = 1 << rxq->strd_num_n;
- const uint16_t strd_sz = 1 << rxq->strd_sz_n;
- const uint16_t strd_shift =
- MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
- const int32_t hdrm_overlap =
- len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
- const uint32_t offset = strd_idx * strd_sz + strd_shift;
- void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
-
- /*
- * Memcpy packets to the target mbuf if:
- * - The size of packet is smaller than mprq_max_memcpy_len.
- * - Out of buffer in the Mempool for Multi-Packet RQ.
- * - The packet's stride overlaps a headroom and scatter is off.
- */
- if (len <= rxq->mprq_max_memcpy_len ||
- rxq->mprq_repl == NULL ||
- (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
- if (likely(len <=
- (uint32_t)(pkt->buf_len - RTE_PKTMBUF_HEADROOM))) {
- rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
- addr, len);
- DATA_LEN(pkt) = len;
- } else if (rxq->strd_scatter_en) {
- struct rte_mbuf *prev = pkt;
- uint32_t seg_len = RTE_MIN(len, (uint32_t)
- (pkt->buf_len - RTE_PKTMBUF_HEADROOM));
- uint32_t rem_len = len - seg_len;
-
- rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
- addr, seg_len);
- DATA_LEN(pkt) = seg_len;
- while (rem_len) {
- struct rte_mbuf *next =
- rte_pktmbuf_alloc(rxq->mp);
-
- if (unlikely(next == NULL))
- return MLX5_RXQ_CODE_NOMBUF;
- NEXT(prev) = next;
- SET_DATA_OFF(next, 0);
- addr = RTE_PTR_ADD(addr, seg_len);
- seg_len = RTE_MIN(rem_len, (uint32_t)
- (next->buf_len - RTE_PKTMBUF_HEADROOM));
- rte_memcpy
- (rte_pktmbuf_mtod(next, void *),
- addr, seg_len);
- DATA_LEN(next) = seg_len;
- rem_len -= seg_len;
- prev = next;
- ++NB_SEGS(pkt);
- }
- } else {
- return MLX5_RXQ_CODE_DROPPED;
- }
- } else {
- rte_iova_t buf_iova;
- struct rte_mbuf_ext_shared_info *shinfo;
- uint16_t buf_len = strd_cnt * strd_sz;
- void *buf_addr;
-
- /* Increment the refcnt of the whole chunk. */
- __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
- MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
- __ATOMIC_RELAXED) <= strd_n + 1);
- buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
- /*
- * MLX5 device doesn't use iova but it is necessary in a
- * case where the Rx packet is transmitted via a
- * different PMD.
- */
- buf_iova = rte_mempool_virt2iova(buf) +
- RTE_PTR_DIFF(buf_addr, buf);
- shinfo = &buf->shinfos[strd_idx];
- rte_mbuf_ext_refcnt_set(shinfo, 1);
- /*
- * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
- * attaching the stride to mbuf and more offload flags
- * will be added below by calling rxq_cq_to_mbuf().
- * Other fields will be overwritten.
- */
- rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
- buf_len, shinfo);
- /* Set mbuf head-room. */
- SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
- MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
- MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
- len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
- DATA_LEN(pkt) = len;
- /*
- * Copy the last fragment of a packet (up to headroom
- * size bytes) in case there is a stride overlap with
- * a next packet's headroom. Allocate a separate mbuf
- * to store this fragment and link it. Scatter is on.
- */
- if (hdrm_overlap > 0) {
- MLX5_ASSERT(rxq->strd_scatter_en);
- struct rte_mbuf *seg =
- rte_pktmbuf_alloc(rxq->mp);
-
- if (unlikely(seg == NULL))
- return MLX5_RXQ_CODE_NOMBUF;
- SET_DATA_OFF(seg, 0);
- rte_memcpy(rte_pktmbuf_mtod(seg, void *),
- RTE_PTR_ADD(addr, len - hdrm_overlap),
- hdrm_overlap);
- DATA_LEN(seg) = hdrm_overlap;
- DATA_LEN(pkt) = len - hdrm_overlap;
- NEXT(pkt) = seg;
- NB_SEGS(pkt) = 2;
- }
- }
- return MLX5_RXQ_CODE_EXIT;
-}
-
-/**
- * Check whether Multi-Packet RQ can be enabled for the device.
- *
- * @param dev
- * Pointer to Ethernet device.
- *
- * @return
- * 1 if supported, negative errno value if not.
- */
-static __rte_always_inline int
-mlx5_check_mprq_support(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
-
- if (priv->config.mprq.enabled &&
- priv->rxqs_n >= priv->config.mprq.min_rxqs_num)
- return 1;
- return -ENOTSUP;
-}
-
-/**
- * Check whether Multi-Packet RQ is enabled for the Rx queue.
- *
- * @param rxq
- * Pointer to receive queue structure.
- *
- * @return
- * 0 if disabled, otherwise enabled.
- */
-static __rte_always_inline int
-mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
-{
- return rxq->strd_num_n > 0;
-}
-
-/**
- * Check whether Multi-Packet RQ is enabled for the device.
- *
- * @param dev
- * Pointer to Ethernet device.
- *
- * @return
- * 0 if disabled, otherwise enabled.
- */
-static __rte_always_inline int
-mlx5_mprq_enabled(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- uint32_t i;
- uint16_t n = 0;
- uint16_t n_ibv = 0;
-
- if (mlx5_check_mprq_support(dev) < 0)
- return 0;
- /* All the configured queues should be enabled. */
- for (i = 0; i < priv->rxqs_n; ++i) {
- struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
- struct mlx5_rxq_ctrl *rxq_ctrl = container_of
- (rxq, struct mlx5_rxq_ctrl, rxq);