#include <stddef.h>
#include <stdint.h>
+#include <sys/queue.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
#include <rte_mempool.h>
#include <rte_common.h>
#include <rte_hexdump.h>
+#include <rte_atomic.h>
#include "mlx5_utils.h"
#include "mlx5.h"
uint64_t oerrors; /**< Total number of failed transmitted packets. */
};
-/* Flow director queue structure. */
-struct fdir_queue {
- struct ibv_qp *qp; /* Associated RX QP. */
- struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */
- struct ibv_exp_wq *wq; /* Work queue. */
- struct ibv_cq *cq; /* Completion queue. */
-};
-
struct priv;
+/* Memory region queue object. */
+struct mlx5_mr {
+ LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+ rte_atomic32_t refcnt; /*<< Reference counter. */
+ uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
+ uintptr_t start; /* Start address of MR */
+ uintptr_t end; /* End address of MR */
+ struct ibv_mr *mr; /*<< Memory Region. */
+ struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
/* Compressed CQE context. */
struct rxq_zip {
uint16_t ai; /* Array index. */
};
/* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
unsigned int csum:1; /* Enable checksum offloading. */
unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
- unsigned int port_id:8;
unsigned int rss_hash:1; /* RSS hash result is enabled. */
unsigned int mark:1; /* Marked flow available on the queue. */
unsigned int pending_err:1; /* CQE error needs to be handled. */
- unsigned int :7; /* Remaining bits. */
+ unsigned int :15; /* Remaining bits. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
+ uint16_t port_id;
uint16_t rq_ci;
uint16_t rq_pi;
uint16_t cq_ci;
struct mlx5_rxq_stats stats;
uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
+ void *cq_uar; /* CQ user access region. */
+ uint32_t cqn; /* CQ number. */
+ uint8_t cq_arm_sn; /* CQ arm seq number. */
} __rte_cache_aligned;
/* RX queue control descriptor. */
-struct rxq_ctrl {
+struct mlx5_rxq_ctrl {
struct priv *priv; /* Back pointer to private data. */
struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_exp_wq *wq; /* Work Queue. */
- struct fdir_queue *fdir_queue; /* Flow director queue. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
+ struct ibv_wq *wq; /* Work Queue. */
+ struct mlx5_mr *mr; /* Memory Region (for mp). */
struct ibv_comp_channel *channel;
unsigned int socket; /* CPU socket ID for allocations. */
- struct rxq rxq; /* Data path structure. */
+ struct mlx5_rxq_data rxq; /* Data path structure. */
};
/* Hash RX queue types. */
/* Flow structure with Ethernet specification. It is packed to prevent padding
* between attr and spec as this layout is expected by libibverbs. */
struct flow_attr_spec_eth {
- struct ibv_exp_flow_attr attr;
- struct ibv_exp_flow_spec_eth spec;
+ struct ibv_flow_attr attr;
+ struct ibv_flow_spec_eth spec;
} __attribute__((packed));
/* Define a struct flow_attr_spec_eth object as an array of at least
unsigned int flow_priority; /* Flow priority to use. */
union {
struct {
- enum ibv_exp_flow_spec_type type;
+ enum ibv_flow_spec_type type;
uint16_t size;
} hdr;
- struct ibv_exp_flow_spec_tcp_udp tcp_udp;
- struct ibv_exp_flow_spec_ipv4 ipv4;
- struct ibv_exp_flow_spec_ipv6 ipv6;
- struct ibv_exp_flow_spec_eth eth;
+ struct ibv_flow_spec_tcp_udp tcp_udp;
+ struct ibv_flow_spec_ipv4 ipv4;
+ struct ibv_flow_spec_ipv6 ipv6;
+ struct ibv_flow_spec_eth eth;
} flow_spec; /* Flow specification template. */
const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
};
struct ibv_qp *qp; /* Hash RX QP. */
enum hash_rxq_type type; /* Hash RX queue type. */
/* MAC flow steering rules, one per VLAN ID. */
- struct ibv_exp_flow *mac_flow
+ struct ibv_flow *mac_flow
[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
- struct ibv_exp_flow *special_flow
+ struct ibv_flow *special_flow
[MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
};
/* TX queue descriptor. */
__extension__
-struct txq {
+struct mlx5_txq_data {
uint16_t elts_head; /* Current counter in (*elts)[]. */
uint16_t elts_tail; /* Counter of first element awaiting completion. */
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+ uint16_t mr_cache_idx; /* Index of last hit entry. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint32_t flags; /* Flags for Tx Queue. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
volatile uint32_t *qp_db; /* Work queue doorbell. */
volatile uint32_t *cq_db; /* Completion queue doorbell. */
volatile void *bf_reg; /* Blueflame register. */
- struct {
- uintptr_t start; /* Start address of MR */
- uintptr_t end; /* End address of MR */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- uint32_t lkey; /* rte_cpu_to_be_32(mr->lkey) */
- } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
- uint16_t mr_cache_idx; /* Index of last hit entry. */
+ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
} __rte_cache_aligned;
/* TX queue control descriptor. */
-struct txq_ctrl {
+struct mlx5_txq_ctrl {
struct priv *priv; /* Back pointer to private data. */
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
- struct txq txq; /* Data path structure. */
+ struct mlx5_txq_data txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
extern uint8_t rss_hash_default_key[];
extern const size_t rss_hash_default_key_len;
-size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *,
+size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *,
size_t, enum hash_rxq_type);
int priv_create_hash_rxqs(struct priv *);
void priv_destroy_hash_rxqs(struct priv *);
int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_rxconf *, struct rte_mempool *);
void mlx5_rx_queue_release(void *);
int priv_rx_intr_vec_enable(struct priv *priv);
void priv_rx_intr_vec_disable(struct priv *priv);
-#ifdef HAVE_UPDATE_CQ_CI
int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
-#endif /* HAVE_UPDATE_CQ_CI */
/* mlx5_txq.c */
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
- unsigned int, const struct rte_eth_txconf *);
+void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
+int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
+ unsigned int, const struct rte_eth_txconf *);
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
+int priv_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
/* Vectorized version of mlx5_rxtx.c */
int priv_check_raw_vec_tx_support(struct priv *);
int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
int priv_check_vec_rx_support(struct priv *);
uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
/* mlx5_mr.c */
struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+ unsigned int);
#ifndef NDEBUG
/**
* WQE address.
*/
static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
{
ci &= ((1 << txq->wqe_n) - 1);
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
* Pointer to TX queue structure.
*/
static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
{
uint16_t i = txq->mr_cache_idx;
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+ struct mlx5_mr *mr;
assert(i < RTE_DIM(txq->mp2mr));
- if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
- return txq->mp2mr[i].lkey;
+ if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+ return txq->mp2mr[i]->lkey;
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i].mr == NULL)) {
+ if (unlikely(txq->mp2mr[i]->mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
- if (txq->mp2mr[i].start <= addr &&
- txq->mp2mr[i].end >= addr) {
- assert(txq->mp2mr[i].lkey != (uint32_t)-1);
- assert(rte_cpu_to_be_32(txq->mp2mr[i].mr->lkey) ==
- txq->mp2mr[i].lkey);
+ if (txq->mp2mr[i]->start <= addr &&
+ txq->mp2mr[i]->end >= addr) {
+ assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+ assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+ txq->mp2mr[i]->lkey);
txq->mr_cache_idx = i;
- return txq->mp2mr[i].lkey;
+ return txq->mp2mr[i]->lkey;
}
}
txq->mr_cache_idx = 0;
- return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+ mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+ /*
+ * Request the reference to use in this queue, the original one is
+ * kept by the control plane.
+ */
+ if (mr) {
+ rte_atomic32_inc(&mr->refcnt);
+ return mr->lkey;
+ }
+ return (uint32_t)-1;
}
/**
* Pointer to the last WQE posted in the NIC.
*/
static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
{
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
volatile uint64_t *src = ((volatile uint64_t *)wqe);