1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
5 #ifndef RTE_PMD_MLX5_COMMON_H_
6 #define RTE_PMD_MLX5_COMMON_H_
11 #include <rte_bus_pci.h>
12 #include <rte_debug.h>
13 #include <rte_atomic.h>
14 #include <rte_rwlock.h>
16 #include <rte_kvargs.h>
17 #include <rte_devargs.h>
18 #include <rte_bitops.h>
19 #include <rte_lcore.h>
20 #include <rte_spinlock.h>
21 #include <rte_os_shim.h>
24 #include "mlx5_devx_cmds.h"
25 #include "mlx5_common_os.h"
26 #include "mlx5_common_mr.h"
28 /* Reported driver name. */
29 #define MLX5_PCI_DRIVER_NAME "mlx5_pci"
30 #define MLX5_AUXILIARY_DRIVER_NAME "mlx5_auxiliary"
32 /* Bit-field manipulation. */
33 #define BITFIELD_DECLARE(bf, type, size) \
34 type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \
35 !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))]
36 #define BITFIELD_DEFINE(bf, type, size) \
37 BITFIELD_DECLARE((bf), type, (size)) = { 0 }
38 #define BITFIELD_SET(bf, b) \
39 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \
40 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))
41 #define BITFIELD_RESET(bf, b) \
42 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \
43 ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))
44 #define BITFIELD_ISSET(bf, b) \
45 !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \
46 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
49 * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant
52 #define PMD_DRV_LOG_STRIP(a, b) a
53 #define PMD_DRV_LOG_OPAREN (
54 #define PMD_DRV_LOG_CPAREN )
55 #define PMD_DRV_LOG_COMMA ,
57 /* Return the file name part of a path. */
58 static inline const char *
59 pmd_drv_log_basename(const char *s)
69 #define PMD_DRV_LOG___(level, type, name, ...) \
70 rte_log(RTE_LOG_ ## level, \
73 RTE_FMT_HEAD(__VA_ARGS__,), \
74 RTE_FMT_TAIL(__VA_ARGS__,)))
76 #ifdef RTE_LIBRTE_MLX5_DEBUG
78 #define PMD_DRV_LOG__(level, type, name, ...) \
79 PMD_DRV_LOG___(level, type, name, "%s:%u: %s(): " __VA_ARGS__)
80 #define PMD_DRV_LOG_(level, type, name, s, ...) \
81 PMD_DRV_LOG__(level, type, name,\
82 s "\n" PMD_DRV_LOG_COMMA \
83 pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \
84 __LINE__ PMD_DRV_LOG_COMMA \
88 #else /* RTE_LIBRTE_MLX5_DEBUG */
89 #define PMD_DRV_LOG__(level, type, name, ...) \
90 PMD_DRV_LOG___(level, type, name, __VA_ARGS__)
91 #define PMD_DRV_LOG_(level, type, name, s, ...) \
92 PMD_DRV_LOG__(level, type, name, s "\n", __VA_ARGS__)
94 #endif /* RTE_LIBRTE_MLX5_DEBUG */
96 /* claim_zero() does not perform any check when debugging is disabled. */
97 #ifdef RTE_LIBRTE_MLX5_DEBUG
99 #define MLX5_ASSERT(exp) RTE_VERIFY(exp)
100 #define claim_zero(...) MLX5_ASSERT((__VA_ARGS__) == 0)
101 #define claim_nonzero(...) MLX5_ASSERT((__VA_ARGS__) != 0)
103 #else /* RTE_LIBRTE_MLX5_DEBUG */
105 #define MLX5_ASSERT(exp) RTE_ASSERT(exp)
106 #define claim_zero(...) (__VA_ARGS__)
107 #define claim_nonzero(...) (__VA_ARGS__)
109 #endif /* RTE_LIBRTE_MLX5_DEBUG */
111 /* Allocate a buffer on the stack and fill it with a printf format string. */
112 #define MKSTR(name, ...) \
113 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
114 char name[mkstr_size_##name + 1]; \
116 memset(name, 0, mkstr_size_##name + 1); \
117 snprintf(name, sizeof(name), "" __VA_ARGS__)
120 PCI_VENDOR_ID_MELLANOX = 0x15b3,
124 PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013,
125 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014,
126 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015,
127 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
128 PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017,
129 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018,
130 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
131 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
132 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2,
133 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
134 PCI_DEVICE_ID_MELLANOX_CONNECTX6 = 0x101b,
135 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF = 0x101c,
136 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX = 0x101d,
137 PCI_DEVICE_ID_MELLANOX_CONNECTXVF = 0x101e,
138 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6,
139 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f,
140 PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021,
141 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc,
144 /* Maximum number of simultaneous unicast MAC addresses. */
145 #define MLX5_MAX_UC_MAC_ADDRESSES 128
146 /* Maximum number of simultaneous Multicast MAC addresses. */
147 #define MLX5_MAX_MC_MAC_ADDRESSES 128
148 /* Maximum number of simultaneous MAC addresses. */
149 #define MLX5_MAX_MAC_ADDRESSES \
150 (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES)
152 /* Recognized Infiniband device physical port name types. */
153 enum mlx5_nl_phys_port_name_type {
154 MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */
155 MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */
156 MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */
157 MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */
158 MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */
159 MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */
160 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */
163 /** Switch information returned by mlx5_nl_switch_info(). */
164 struct mlx5_switch_info {
165 uint32_t master:1; /**< Master device. */
166 uint32_t representor:1; /**< Representor device. */
167 enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */
168 int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */
169 int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
170 int32_t port_name; /**< Representor port name. */
171 uint64_t switch_id; /**< Switch identifier. */
175 enum mlx5_cqe_status {
176 MLX5_CQE_STATUS_SW_OWN = -1,
177 MLX5_CQE_STATUS_HW_OWN = -2,
178 MLX5_CQE_STATUS_ERR = -3,
182 * Check whether CQE is valid.
187 * Size of completion queue.
194 static __rte_always_inline enum mlx5_cqe_status
195 check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,
198 const uint16_t idx = ci & cqes_n;
199 const uint8_t op_own = cqe->op_own;
200 const uint8_t op_owner = MLX5_CQE_OWNER(op_own);
201 const uint8_t op_code = MLX5_CQE_OPCODE(op_own);
203 if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
204 return MLX5_CQE_STATUS_HW_OWN;
206 if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
207 op_code == MLX5_CQE_REQ_ERR))
208 return MLX5_CQE_STATUS_ERR;
209 return MLX5_CQE_STATUS_SW_OWN;
213 * Get PCI address <DBDF> string from EAL device.
216 * The output address buffer string
218 * The output buffer size
221 * - Negative value and rte_errno is set otherwise.
223 int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size);
226 * Get PCI address from sysfs of a PCI-related device.
228 * @param[in] dev_path
229 * The sysfs path should not point to the direct plain PCI device.
230 * Instead, the node "/device/" is used to access the real device.
231 * @param[out] pci_addr
232 * Parsed PCI address.
236 * - Negative value and rte_errno is set otherwise.
239 int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr);
242 * Get kernel network interface name from sysfs IB device path.
244 * @param[in] ibdev_path
245 * The sysfs path to IB device.
247 * Interface name output of size IF_NAMESIZE.
251 * - Negative value and rte_errno is set otherwise.
254 int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname);
257 int mlx5_auxiliary_get_child_name(const char *dev, const char *node,
258 char *child, size_t size);
262 MLX5_CLASS_ETH = RTE_BIT64(0),
263 MLX5_CLASS_VDPA = RTE_BIT64(1),
264 MLX5_CLASS_REGEX = RTE_BIT64(2),
265 MLX5_CLASS_COMPRESS = RTE_BIT64(3),
266 MLX5_CLASS_CRYPTO = RTE_BIT64(4),
269 #define MLX5_DBR_SIZE RTE_CACHE_LINE_SIZE
271 /* devX creation object */
272 struct mlx5_devx_obj {
273 void *obj; /* The DV object. */
274 int id; /* The object ID. */
277 /* UMR memory buffer used to define 1 entry in indirect mkey. */
284 /** Control for key/values list. */
285 struct mlx5_kvargs_ctrl {
286 struct rte_kvargs *kvlist; /* Structure containing list of key/values.*/
287 bool is_used[RTE_KVARGS_MAX]; /* Indicator which devargs were used. */
291 * Call a handler function for each key/value in the list of keys.
293 * For each key/value association that matches the given key, calls the
294 * handler function with the for a given arg_name passing the value on the
295 * dictionary for that key and a given extra argument.
298 * The mlx5_kvargs structure.
300 * A list of keys to process (table of const char *, the last must be NULL).
302 * The function to call for each matching key.
304 * A pointer passed unchanged to the handler.
308 * - Negative on error
312 mlx5_kvargs_process(struct mlx5_kvargs_ctrl *mkvlist, const char *const keys[],
313 arg_handler_t handler, void *opaque_arg);
315 /* All UAR arguments using doorbell register in datapath. */
316 struct mlx5_uar_data {
318 /* The doorbell's virtual address mapped to the relevant HW UAR space.*/
320 rte_spinlock_t *sl_p;
321 /* Pointer to UAR access lock required for 32bit implementations. */
322 #endif /* RTE_ARCH_64 */
325 /* DevX UAR control structure. */
327 struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */
328 struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */
329 void *obj; /* DevX UAR object. */
330 bool dbnc; /* Doorbell mapped to non-cached region. */
332 rte_spinlock_t bf_sl;
333 rte_spinlock_t cq_sl;
334 /* UAR access locks required for 32bit implementations. */
335 #endif /* RTE_ARCH_64 */
339 * Ring a doorbell and flush the update if requested.
342 * Pointer to UAR data structure.
344 * value to write in big endian format.
346 * Index of doorbell record.
348 * Address of doorbell record.
350 * Decide whether to flush the DB writing using a memory barrier.
352 static __rte_always_inline void
353 mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
354 volatile uint32_t *db_rec, bool flash)
357 *db_rec = rte_cpu_to_be_32(index);
358 /* Ensure ordering between DB record actual update and UAR access. */
362 #else /* !RTE_ARCH_64 */
363 rte_spinlock_lock(uar->sl_p);
364 *(volatile uint32_t *)uar->db = val;
366 *((volatile uint32_t *)uar->db + 1) = val >> 32;
367 rte_spinlock_unlock(uar->sl_p);
374 * Get the doorbell register mapping type.
376 * @param uar_mmap_offset
377 * Mmap offset of Verbs/DevX UAR.
382 * 1 for non-cached, 0 otherwise.
384 static inline uint16_t
385 mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size)
387 off_t cmd = uar_mmap_offset / page_size;
389 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
390 cmd &= MLX5_UAR_MMAP_CMD_MASK;
391 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
397 void mlx5_translate_port_name(const char *port_name_in,
398 struct mlx5_switch_info *port_info_out);
399 void mlx5_glue_constructor(void);
400 extern uint8_t haswell_broadwell_cpu;
403 void mlx5_common_init(void);
406 * Common Driver Interface
408 * ConnectX common driver supports multiple classes: net, vDPA, regex, crypto
409 * and compress devices. This layer enables creating such multiple classes
410 * on a single device by allowing to bind multiple class-specific device
411 * drivers to attach to the common driver.
413 * ------------ ------------- -------------- ----------------- ------------
414 * | mlx5 net | | mlx5 vdpa | | mlx5 regex | | mlx5 compress | | mlx5 ... |
415 * | driver | | driver | | driver | | driver | | drivers |
416 * ------------ ------------- -------------- ----------------- ------------
423 * ----------- -----------------
425 * | pci dev | | auxiliary dev |
426 * ----------- -----------------
428 * - mlx5 PCI bus driver binds to mlx5 PCI devices defined by PCI ID table
429 * of all related devices.
430 * - mlx5 class driver such as net, vDPA, regex defines its specific
431 * PCI ID table and mlx5 bus driver probes matching class drivers.
432 * - mlx5 common driver is central place that validates supported
433 * class combinations.
434 * - mlx5 common driver hides bus difference by resolving device address
435 * from devargs, locating target RDMA device and probing with it.
439 * Device configuration structure.
441 * Merged configuration from:
443 * - Device capabilities,
444 * - User device parameters disabled features.
446 struct mlx5_common_dev_config {
447 struct mlx5_hca_attr hca_attr; /* HCA attributes. */
448 int dbnc; /* Skip doorbell register write barrier. */
449 int device_fd; /* Device file descriptor for importation. */
450 int pd_handle; /* Protection Domain handle for importation. */
451 unsigned int devx:1; /* Whether devx interface is available or not. */
452 unsigned int sys_mem_en:1; /* The default memory allocator. */
453 unsigned int mr_mempool_reg_en:1;
454 /* Allow/prevent implicit mempool memory registration. */
455 unsigned int mr_ext_memseg_en:1;
456 /* Whether memseg should be extended for MR creation. */
459 struct mlx5_common_device {
460 struct rte_device *dev;
461 TAILQ_ENTRY(mlx5_common_device) next;
462 uint32_t classes_loaded;
463 void *ctx; /* Verbs/DV/DevX context. */
464 void *pd; /* Protection Domain. */
465 uint32_t pdn; /* Protection Domain Number. */
466 struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */
467 struct mlx5_common_dev_config config; /* Device configuration. */
471 * Indicates whether PD and CTX are imported from another process,
472 * or created by this process.
475 * Pointer to common device.
478 * True if PD and CTX are imported from another process, False otherwise.
481 mlx5_imported_pd_and_ctx(struct mlx5_common_device *cdev)
483 return cdev->config.device_fd != MLX5_ARG_UNSET &&
484 cdev->config.pd_handle != MLX5_ARG_UNSET;
488 * Initialization function for the driver called during device probing.
490 typedef int (mlx5_class_driver_probe_t)(struct mlx5_common_device *cdev,
491 struct mlx5_kvargs_ctrl *mkvlist);
494 * Uninitialization function for the driver called during hot-unplugging.
496 typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *cdev);
498 /** Device already probed can be probed again to check for new ports. */
499 #define MLX5_DRV_PROBE_AGAIN 0x0004
502 * A structure describing a mlx5 common class driver.
504 struct mlx5_class_driver {
505 TAILQ_ENTRY(mlx5_class_driver) next;
506 enum mlx5_class drv_class; /**< Class of this driver. */
507 const char *name; /**< Driver name. */
508 mlx5_class_driver_probe_t *probe; /**< Device probe function. */
509 mlx5_class_driver_remove_t *remove; /**< Device remove function. */
510 const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
511 uint32_t probe_again:1;
512 /**< Device already probed can be probed again to check new device. */
513 uint32_t intr_lsc:1; /**< Supports link state interrupt. */
514 uint32_t intr_rmv:1; /**< Supports device remove interrupt. */
518 * Register a mlx5 device driver.
521 * A pointer to a mlx5_driver structure describing the driver
526 mlx5_class_driver_register(struct mlx5_class_driver *driver);
529 * Test device is a PCI bus device.
535 * - True on device devargs is a PCI bus device.
540 mlx5_dev_is_pci(const struct rte_device *dev);
543 * Test PCI device is a VF device.
546 * Pointer to PCI device.
549 * - True on PCI device is a VF device.
554 mlx5_dev_is_vf_pci(struct rte_pci_device *pci_dev);
558 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev);
562 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev,
563 struct rte_mempool *mp);
567 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar);
571 mlx5_devx_uar_release(struct mlx5_uar *uar);
573 /* mlx5_common_os.c */
575 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
576 int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);
577 int mlx5_os_pd_release(struct mlx5_common_device *cdev);
578 int mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config);
580 /* mlx5 PMD wrapped MR struct. */
581 struct mlx5_pmd_wrapped_mr {
585 void *obj; /* verbs mr object or devx umem object. */
586 void *imkey; /* DevX indirect mkey object. */
591 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr,
592 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr);
596 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr);
598 #endif /* RTE_PMD_MLX5_COMMON_H_ */