#include <stdint.h>
#include <sys/queue.h>
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#include <infiniband/mlx5dv.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_common.h>
#include <rte_io.h>
#include <rte_bus_pci.h>
#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <mlx5_glue.h>
+#include <mlx5_prm.h>
+#include <mlx5_common.h>
+#include <mlx5_common_mr.h>
+
+#include "mlx5_defs.h"
#include "mlx5_utils.h"
#include "mlx5.h"
-#include "mlx5_mr.h"
#include "mlx5_autoconf.h"
-#include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
/* Support tunnel matching. */
-#define MLX5_FLOW_TUNNEL 9
+#define MLX5_FLOW_TUNNEL 10
+
+/* Mbuf dynamic flag offset for inline. */
+extern uint64_t rte_net_mlx5_dynf_inline_mask;
struct mlx5_rxq_stats {
#ifdef MLX5_PMD_SOFT_COUNTERS
struct mlx5_rxq_data {
unsigned int csum:1; /* Enable checksum offloading. */
unsigned int hw_timestamp:1; /* Enable HW timestamp. */
+ unsigned int rt_timestamp:1; /* Realtime timestamp format. */
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
unsigned int sges_n:3; /* Log 2 of SGEs (max buffers per packet). */
unsigned int strd_sz_n:4; /* Log 2 of stride size. */
unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
- unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */
+ unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
unsigned int lro:1; /* Enable LRO. */
- unsigned int :1; /* Remaining bits. */
+ unsigned int dynf_meta:1; /* Dynamic metadata is configured. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
struct rte_mempool *mp;
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+ struct mlx5_dev_ctx_shared *sh; /* Shared context. */
uint16_t idx; /* Queue index. */
struct mlx5_rxq_stats stats;
- uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
+ rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
- void *cq_uar; /* CQ user access region. */
+ void *cq_uar; /* Verbs CQ user access region. */
uint32_t cqn; /* CQ number. */
uint8_t cq_arm_sn; /* CQ arm seq number. */
#ifndef RTE_ARCH_64
/* CQ (UAR) access lock required for 32bit implementations */
#endif
uint32_t tunnel; /* Tunnel information. */
+ uint64_t flow_meta_mask;
+ int32_t flow_meta_offset;
} __rte_cache_aligned;
enum mlx5_rxq_obj_type {
LIST_ENTRY(mlx5_rxq_obj) next; /* Pointer to the next element. */
rte_atomic32_t refcnt; /* Reference counter. */
struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
- struct ibv_cq *cq; /* Completion Queue. */
enum mlx5_rxq_obj_type type;
+ int fd; /* File descriptor for event channel */
RTE_STD_C11
union {
- struct ibv_wq *wq; /* Work Queue. */
- struct mlx5_devx_obj *rq; /* DevX object for Rx Queue. */
+ struct {
+ struct ibv_wq *wq; /* Work Queue. */
+ struct ibv_cq *ibv_cq; /* Completion Queue. */
+ struct ibv_comp_channel *ibv_channel;
+ };
+ struct {
+ struct mlx5_devx_obj *rq; /* DevX Rx Queue object. */
+ struct mlx5_devx_obj *devx_cq; /* DevX CQ object. */
+ struct mlx5dv_devx_event_channel *devx_channel;
+ };
};
- struct ibv_comp_channel *channel;
};
/* RX queue control descriptor. */
enum mlx5_rxq_type type; /* Rxq type. */
unsigned int socket; /* CPU socket ID for allocations. */
unsigned int irq:1; /* Whether IRQ is enabled. */
- unsigned int dbr_umem_id_valid:1; /* dbr_umem_id holds a valid value. */
+ unsigned int rq_dbr_umem_id_valid:1;
+ unsigned int cq_dbr_umem_id_valid:1;
uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
uint32_t wqn; /* WQ number. */
uint16_t dump_file_n; /* Number of dump files. */
- uint32_t dbr_umem_id; /* Storing door-bell information, */
- uint64_t dbr_offset; /* needed when freeing door-bell. */
+ uint32_t rq_dbr_umem_id;
+ uint64_t rq_dbr_offset;
+ /* Storing RQ door-bell information, needed when freeing door-bell. */
+ uint32_t cq_dbr_umem_id;
+ uint64_t cq_dbr_offset;
+ /* Storing CQ door-bell information, needed when freeing door-bell. */
struct mlx5dv_devx_umem *wq_umem; /* WQ buffer registration info. */
+ struct mlx5dv_devx_umem *cq_umem; /* CQ buffer registration info. */
struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
};
/* Hash Rx queue. */
struct mlx5_hrxq {
- LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+ ILIST_ENTRY(uint32_t)next; /* Index to the next element. */
rte_atomic32_t refcnt; /* Reference counter. */
struct mlx5_ind_table_obj *ind_table; /* Indirection table. */
RTE_STD_C11
uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */
/* WQ related fields. */
uint16_t cq_ci; /* Consumer index for completion queue. */
-#ifndef NDEBUG
- uint16_t cq_pi; /* Counter of issued CQE "always" requests. */
-#endif
+ uint16_t cq_pi; /* Production index for completion queue. */
uint16_t cqe_s; /* Number of CQ elements. */
uint16_t cqe_m; /* Mask for CQ indices. */
/* CQ related fields. */
struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
struct mlx5_wqe *wqes; /* Work queue. */
struct mlx5_wqe *wqes_end; /* Work queue array limit. */
+#ifdef RTE_LIBRTE_MLX5_DEBUG
+ uint32_t *fcqs; /* Free completion queue (debug extended). */
+#else
+ uint16_t *fcqs; /* Free completion queue. */
+#endif
volatile struct mlx5_cqe *cqes; /* Completion queue. */
volatile uint32_t *qp_db; /* Work queue doorbell. */
volatile uint32_t *cq_db; /* Completion queue doorbell. */
uint16_t port_id; /* Port ID of device. */
uint16_t idx; /* Queue index. */
+ uint64_t ts_mask; /* Timestamp flag dynamic mask. */
+ int32_t ts_offset; /* Timestamp field dynamic offset. */
+ struct mlx5_dev_ctx_shared *sh; /* Shared context. */
struct mlx5_txq_stats stats; /* TX queue counters. */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock;
enum mlx5_txq_obj_type {
MLX5_TXQ_OBJ_TYPE_IBV, /* mlx5_txq_obj with ibv_wq. */
+ MLX5_TXQ_OBJ_TYPE_DEVX_SQ, /* mlx5_txq_obj with mlx5_devx_sq. */
MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN,
/* mlx5_txq_obj with mlx5_devx_tq and hairpin support. */
};
/* DevX object for Sx queue. */
struct mlx5_devx_obj *tis; /* The TIS object. */
};
+ struct {
+ struct rte_eth_dev *dev;
+ struct mlx5_devx_obj *cq_devx;
+ struct mlx5dv_devx_umem *cq_umem;
+ void *cq_buf;
+ int64_t cq_dbrec_offset;
+ struct mlx5_devx_dbr_page *cq_dbrec_page;
+ struct mlx5_devx_obj *sq_devx;
+ struct mlx5dv_devx_umem *sq_umem;
+ void *sq_buf;
+ int64_t sq_dbrec_offset;
+ struct mlx5_devx_dbr_page *sq_dbrec_page;
+ };
};
};
int mlx5_mprq_enabled(struct rte_eth_dev *dev);
int mlx5_mprq_free_mp(struct rte_eth_dev *dev);
int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev);
+int mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id);
int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp);
int mlx5_rxq_verify(struct rte_eth_dev *dev);
int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
int mlx5_ind_table_obj_verify(struct rte_eth_dev *dev);
-struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
- const uint8_t *rss_key, uint32_t rss_key_len,
- uint64_t hash_fields,
- const uint16_t *queues, uint32_t queues_n,
- int tunnel __rte_unused);
-struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
- const uint8_t *rss_key, uint32_t rss_key_len,
- uint64_t hash_fields,
- const uint16_t *queues, uint32_t queues_n);
-int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
+uint32_t mlx5_hrxq_new(struct rte_eth_dev *dev,
+ const uint8_t *rss_key, uint32_t rss_key_len,
+ uint64_t hash_fields,
+ const uint16_t *queues, uint32_t queues_n,
+ int tunnel __rte_unused);
+uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev,
+ const uint8_t *rss_key, uint32_t rss_key_len,
+ uint64_t hash_fields,
+ const uint16_t *queues, uint32_t queues_n);
+int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hxrq_idx);
int mlx5_hrxq_verify(struct rte_eth_dev *dev);
enum mlx5_rxq_type mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx);
struct mlx5_hrxq *mlx5_hrxq_drop_new(struct rte_eth_dev *dev);
void mlx5_hrxq_drop_release(struct rte_eth_dev *dev);
uint64_t mlx5_get_rx_port_offloads(void);
uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
+void mlx5_rxq_timestamp_set(struct rte_eth_dev *dev);
+
/* mlx5_txq.c */
+int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id);
+int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id);
int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_txconf *conf);
int mlx5_tx_hairpin_queue_setup
const struct rte_eth_hairpin_conf *hairpin_conf);
void mlx5_tx_queue_release(void *dpdk_txq);
int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
+void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev);
struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
enum mlx5_txq_obj_type type);
struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_verify(struct rte_eth_dev *dev);
void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
+void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
+void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev);
/* mlx5_rxtx.c */
void mlx5_set_ptype_table(void);
void mlx5_set_cksum_table(void);
void mlx5_set_swp_types_table(void);
-__rte_noinline int mlx5_tx_error_cqe_handle
- (struct mlx5_txq_data *restrict txq,
- volatile struct mlx5_err_cqe *err_cqe);
uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec);
const void *buf, unsigned int len);
int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
const struct mlx5_mp_arg_queue_state_modify *sm);
+void mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+ struct rte_eth_rxq_info *qinfo);
+void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+ struct rte_eth_txq_info *qinfo);
+int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+ struct rte_eth_burst_mode *mode);
+int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+ struct rte_eth_burst_mode *mode);
/* Vectorized version of mlx5_rxtx.c */
int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
#endif
-/* CQE status. */
-enum mlx5_cqe_status {
- MLX5_CQE_STATUS_SW_OWN = -1,
- MLX5_CQE_STATUS_HW_OWN = -2,
- MLX5_CQE_STATUS_ERR = -3,
-};
-
-/**
- * Check whether CQE is valid.
- *
- * @param cqe
- * Pointer to CQE.
- * @param cqes_n
- * Size of completion queue.
- * @param ci
- * Consumer index.
- *
- * @return
- * The CQE status.
- */
-static __rte_always_inline enum mlx5_cqe_status
-check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,
- const uint16_t ci)
-{
- const uint16_t idx = ci & cqes_n;
- const uint8_t op_own = cqe->op_own;
- const uint8_t op_owner = MLX5_CQE_OWNER(op_own);
- const uint8_t op_code = MLX5_CQE_OPCODE(op_own);
-
- if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
- return MLX5_CQE_STATUS_HW_OWN;
- rte_cio_rmb();
- if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
- op_code == MLX5_CQE_REQ_ERR))
- return MLX5_CQE_STATUS_ERR;
- return MLX5_CQE_STATUS_SW_OWN;
-}
-
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
* cloned mbuf is allocated is returned instead.
uint32_t lkey;
/* Linear search on MR cache array. */
- lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru,
- MLX5_MR_CACHE_N, addr);
+ lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
+ MLX5_MR_CACHE_N, addr);
if (likely(lkey != UINT32_MAX))
return lkey;
/* Take slower bottom-half (Binary Search) on miss. */
if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
mlx5_mr_flush_local_cache(mr_ctrl);
/* Linear search on MR cache array. */
- lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru,
- MLX5_MR_CACHE_N, addr);
+ lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
+ MLX5_MR_CACHE_N, addr);
if (likely(lkey != UINT32_MAX))
return lkey;
/* Take slower bottom-half on miss. */
mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
}
+/**
+ * Convert timestamp from HW format to linear counter
+ * from Packet Pacing Clock Queue CQE timestamp format.
+ *
+ * @param sh
+ * Pointer to the device shared context. Might be needed
+ * to convert according current device configuration.
+ * @param ts
+ * Timestamp from CQE to convert.
+ * @return
+ * UTC in nanoseconds
+ */
+static __rte_always_inline uint64_t
+mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts)
+{
+ RTE_SET_USED(sh);
+ return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S;
+}
+
+/**
+ * Convert timestamp from mbuf format to linear counter
+ * of Clock Queue completions (24 bits)
+ *
+ * @param sh
+ * Pointer to the device shared context to fetch Tx
+ * packet pacing timestamp and parameters.
+ * @param ts
+ * Timestamp from mbuf to convert.
+ * @return
+ * positive or zero value - completion ID to wait
+ * negative value - conversion error
+ */
+static __rte_always_inline int32_t
+mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts)
+{
+ uint64_t ts, ci;
+ uint32_t tick;
+
+ do {
+ /*
+ * Read atomically two uint64_t fields and compare lsb bits.
+ * It there is no match - the timestamp was updated in
+ * the service thread, data should be re-read.
+ */
+ rte_compiler_barrier();
+ ci = rte_atomic64_read(&sh->txpp.ts.ci_ts);
+ ts = rte_atomic64_read(&sh->txpp.ts.ts);
+ rte_compiler_barrier();
+ if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH)))
+ break;
+ } while (true);
+ /* Perform the skew correction, positive value to send earlier. */
+ mts -= sh->txpp.skew;
+ mts -= ts;
+ if (unlikely(mts >= UINT64_MAX / 2)) {
+ /* We have negative integer, mts is in the past. */
+ rte_atomic32_inc(&sh->txpp.err_ts_past);
+ return -1;
+ }
+ tick = sh->txpp.tick;
+ MLX5_ASSERT(tick);
+ /* Convert delta to completions, round up. */
+ mts = (mts + tick - 1) / tick;
+ if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) {
+ /* We have mts is too distant future. */
+ rte_atomic32_inc(&sh->txpp.err_ts_future);
+ return -1;
+ }
+ mts <<= 64 - MLX5_CQ_INDEX_WIDTH;
+ ci += mts;
+ ci >>= 64 - MLX5_CQ_INDEX_WIDTH;
+ return ci;
+}
+
#endif /* RTE_PMD_MLX5_RXTX_H_ */