net/mlx5: workaround ASO memory region creation
[dpdk.git] / drivers / common / mlx5 / linux / mlx5_common_os.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4
5 #include <unistd.h>
6 #include <string.h>
7 #include <stdio.h>
8 #ifdef RTE_IBVERBS_LINK_DLOPEN
9 #include <dlfcn.h>
10 #endif
11 #include <dirent.h>
12 #include <net/if.h>
13
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16
17 #include "mlx5_common.h"
18 #include "mlx5_common_log.h"
19 #include "mlx5_common_os.h"
20 #include "mlx5_glue.h"
21
22 #ifdef MLX5_GLUE
23 const struct mlx5_glue *mlx5_glue;
24 #endif
25
26 /**
27  * Get PCI information by sysfs device path.
28  *
29  * @param dev_path
30  *   Pointer to device sysfs folder name.
31  * @param[out] pci_addr
32  *   PCI bus address output buffer.
33  *
34  * @return
35  *   0 on success, a negative errno value otherwise and rte_errno is set.
36  */
37 int
38 mlx5_dev_to_pci_addr(const char *dev_path,
39                      struct rte_pci_addr *pci_addr)
40 {
41         FILE *file;
42         char line[32];
43         int rc = -ENOENT;
44         MKSTR(path, "%s/device/uevent", dev_path);
45
46         file = fopen(path, "rb");
47         if (file == NULL) {
48                 rte_errno = errno;
49                 return -rte_errno;
50         }
51         while (fgets(line, sizeof(line), file) == line) {
52                 size_t len = strlen(line);
53
54                 /* Truncate long lines. */
55                 if (len == (sizeof(line) - 1)) {
56                         while (line[(len - 1)] != '\n') {
57                                 int ret = fgetc(file);
58
59                                 if (ret == EOF)
60                                         goto exit;
61                                 line[(len - 1)] = ret;
62                         }
63                         /* No match for long lines. */
64                         continue;
65                 }
66                 /* Extract information. */
67                 if (sscanf(line,
68                            "PCI_SLOT_NAME="
69                            "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
70                            &pci_addr->domain,
71                            &pci_addr->bus,
72                            &pci_addr->devid,
73                            &pci_addr->function) == 4) {
74                         rc = 0;
75                         break;
76                 }
77         }
78 exit:
79         fclose(file);
80         if (rc)
81                 rte_errno = -rc;
82         return rc;
83 }
84
85 /**
86  * Extract port name, as a number, from sysfs or netlink information.
87  *
88  * @param[in] port_name_in
89  *   String representing the port name.
90  * @param[out] port_info_out
91  *   Port information, including port name as a number and port name
92  *   type if recognized
93  *
94  * @return
95  *   port_name field set according to recognized name format.
96  */
97 void
98 mlx5_translate_port_name(const char *port_name_in,
99                          struct mlx5_switch_info *port_info_out)
100 {
101         char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
102         char *end;
103         int sc_items;
104
105         sc_items = sscanf(port_name_in, "%c%d",
106                           &ctrl, &port_info_out->ctrl_num);
107         if (sc_items == 2 && ctrl == 'c') {
108                 port_name_in++; /* 'c' */
109                 port_name_in += snprintf(NULL, 0, "%d",
110                                           port_info_out->ctrl_num);
111         }
112         /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
113         sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
114                           &pf_c1, &pf_c2, &port_info_out->pf_num,
115                           &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
116         if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
117                 if (vf_c1 == 'v' && vf_c2 == 'f') {
118                         /* Kernel ver >= 5.0 or OFED ver >= 4.6 */
119                         port_info_out->name_type =
120                                         MLX5_PHYS_PORT_NAME_TYPE_PFVF;
121                         return;
122                 }
123                 if (vf_c1 == 's' && vf_c2 == 'f') {
124                         /* Kernel ver >= 5.11 or OFED ver >= 5.1 */
125                         port_info_out->name_type =
126                                         MLX5_PHYS_PORT_NAME_TYPE_PFSF;
127                         return;
128                 }
129         }
130         /*
131          * Check for port-name as a string of the form p0
132          * (support kernel ver >= 5.0, or OFED ver >= 4.6).
133          */
134         sc_items = sscanf(port_name_in, "%c%d%c",
135                           &pf_c1, &port_info_out->port_name, &eol);
136         if (sc_items == 2 && pf_c1 == 'p') {
137                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
138                 return;
139         }
140         /*
141          * Check for port-name as a string of the form pf0
142          * (support kernel ver >= 5.7 for HPF representor on BF).
143          */
144         sc_items = sscanf(port_name_in, "%c%c%d%c",
145                           &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
146         if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
147                 port_info_out->port_name = -1;
148                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
149                 return;
150         }
151         /* Check for port-name as a number (support kernel ver < 5.0 */
152         errno = 0;
153         port_info_out->port_name = strtol(port_name_in, &end, 0);
154         if (!errno &&
155             (size_t)(end - port_name_in) == strlen(port_name_in)) {
156                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
157                 return;
158         }
159         port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
160 }
161
162 /**
163  * Get kernel interface name from IB device path.
164  *
165  * @param[in] ibdev_path
166  *   Pointer to IB device path.
167  * @param[out] ifname
168  *   Interface name output buffer.
169  *
170  * @return
171  *   0 on success, a negative errno value otherwise and rte_errno is set.
172  */
173 int
174 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
175 {
176         DIR *dir;
177         struct dirent *dent;
178         unsigned int dev_type = 0;
179         unsigned int dev_port_prev = ~0u;
180         char match[IF_NAMESIZE] = "";
181
182         MLX5_ASSERT(ibdev_path);
183         {
184                 MKSTR(path, "%s/device/net", ibdev_path);
185
186                 dir = opendir(path);
187                 if (dir == NULL) {
188                         rte_errno = errno;
189                         return -rte_errno;
190                 }
191         }
192         while ((dent = readdir(dir)) != NULL) {
193                 char *name = dent->d_name;
194                 FILE *file;
195                 unsigned int dev_port;
196                 int r;
197
198                 if ((name[0] == '.') &&
199                     ((name[1] == '\0') ||
200                      ((name[1] == '.') && (name[2] == '\0'))))
201                         continue;
202
203                 MKSTR(path, "%s/device/net/%s/%s",
204                       ibdev_path, name,
205                       (dev_type ? "dev_id" : "dev_port"));
206
207                 file = fopen(path, "rb");
208                 if (file == NULL) {
209                         if (errno != ENOENT)
210                                 continue;
211                         /*
212                          * Switch to dev_id when dev_port does not exist as
213                          * is the case with Linux kernel versions < 3.15.
214                          */
215 try_dev_id:
216                         match[0] = '\0';
217                         if (dev_type)
218                                 break;
219                         dev_type = 1;
220                         dev_port_prev = ~0u;
221                         rewinddir(dir);
222                         continue;
223                 }
224                 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
225                 fclose(file);
226                 if (r != 1)
227                         continue;
228                 /*
229                  * Switch to dev_id when dev_port returns the same value for
230                  * all ports. May happen when using a MOFED release older than
231                  * 3.0 with a Linux kernel >= 3.15.
232                  */
233                 if (dev_port == dev_port_prev)
234                         goto try_dev_id;
235                 dev_port_prev = dev_port;
236                 if (dev_port == 0)
237                         strlcpy(match, name, IF_NAMESIZE);
238         }
239         closedir(dir);
240         if (match[0] == '\0') {
241                 rte_errno = ENOENT;
242                 return -rte_errno;
243         }
244         strncpy(ifname, match, IF_NAMESIZE);
245         return 0;
246 }
247
248 #ifdef MLX5_GLUE
249
250 /**
251  * Suffix RTE_EAL_PMD_PATH with "-glue".
252  *
253  * This function performs a sanity check on RTE_EAL_PMD_PATH before
254  * suffixing its last component.
255  *
256  * @param buf[out]
257  *   Output buffer, should be large enough otherwise NULL is returned.
258  * @param size
259  *   Size of @p out.
260  *
261  * @return
262  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
263  */
264 static char *
265 mlx5_glue_path(char *buf, size_t size)
266 {
267         static const char *const bad[] = { "/", ".", "..", NULL };
268         const char *path = RTE_EAL_PMD_PATH;
269         size_t len = strlen(path);
270         size_t off;
271         int i;
272
273         while (len && path[len - 1] == '/')
274                 --len;
275         for (off = len; off && path[off - 1] != '/'; --off)
276                 ;
277         for (i = 0; bad[i]; ++i)
278                 if (!strncmp(path + off, bad[i], (int)(len - off)))
279                         goto error;
280         i = snprintf(buf, size, "%.*s-glue", (int)len, path);
281         if (i == -1 || (size_t)i >= size)
282                 goto error;
283         return buf;
284 error:
285         RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
286                 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
287                 " re-configure DPDK");
288         return NULL;
289 }
290
291 static int
292 mlx5_glue_dlopen(void)
293 {
294         char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
295         void *handle = NULL;
296
297         char const *path[] = {
298                 /*
299                  * A basic security check is necessary before trusting
300                  * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
301                  */
302                 (geteuid() == getuid() && getegid() == getgid() ?
303                  getenv("MLX5_GLUE_PATH") : NULL),
304                 /*
305                  * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
306                  * variant, otherwise let dlopen() look up libraries on its
307                  * own.
308                  */
309                 (*RTE_EAL_PMD_PATH ?
310                  mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
311         };
312         unsigned int i = 0;
313         void **sym;
314         const char *dlmsg;
315
316         while (!handle && i != RTE_DIM(path)) {
317                 const char *end;
318                 size_t len;
319                 int ret;
320
321                 if (!path[i]) {
322                         ++i;
323                         continue;
324                 }
325                 end = strpbrk(path[i], ":;");
326                 if (!end)
327                         end = path[i] + strlen(path[i]);
328                 len = end - path[i];
329                 ret = 0;
330                 do {
331                         char name[ret + 1];
332
333                         ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
334                                        (int)len, path[i],
335                                        (!len || *(end - 1) == '/') ? "" : "/");
336                         if (ret == -1)
337                                 break;
338                         if (sizeof(name) != (size_t)ret + 1)
339                                 continue;
340                         DRV_LOG(DEBUG, "Looking for rdma-core glue as "
341                                 "\"%s\"", name);
342                         handle = dlopen(name, RTLD_LAZY);
343                         break;
344                 } while (1);
345                 path[i] = end + 1;
346                 if (!*end)
347                         ++i;
348         }
349         if (!handle) {
350                 rte_errno = EINVAL;
351                 dlmsg = dlerror();
352                 if (dlmsg)
353                         DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
354                 goto glue_error;
355         }
356         sym = dlsym(handle, "mlx5_glue");
357         if (!sym || !*sym) {
358                 rte_errno = EINVAL;
359                 dlmsg = dlerror();
360                 if (dlmsg)
361                         DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
362                 goto glue_error;
363         }
364         mlx5_glue = *sym;
365         return 0;
366
367 glue_error:
368         if (handle)
369                 dlclose(handle);
370         return -1;
371 }
372
373 #endif
374
375 /**
376  * Initialization routine for run-time dependency on rdma-core.
377  */
378 void
379 mlx5_glue_constructor(void)
380 {
381         /*
382          * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
383          * huge pages. Calling ibv_fork_init() during init allows
384          * applications to use fork() safely for purposes other than
385          * using this PMD, which is not supported in forked processes.
386          */
387         setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
388         /* Match the size of Rx completion entry to the size of a cacheline. */
389         if (RTE_CACHE_LINE_SIZE == 128)
390                 setenv("MLX5_CQE_SIZE", "128", 0);
391         /*
392          * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
393          * cleanup all the Verbs resources even when the device was removed.
394          */
395         setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
396
397 #ifdef MLX5_GLUE
398         if (mlx5_glue_dlopen() != 0)
399                 goto glue_error;
400 #endif
401
402 #ifdef RTE_LIBRTE_MLX5_DEBUG
403         /* Glue structure must not contain any NULL pointers. */
404         {
405                 unsigned int i;
406
407                 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
408                         MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
409         }
410 #endif
411         if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
412                 rte_errno = EINVAL;
413                 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
414                         "required", mlx5_glue->version, MLX5_GLUE_VERSION);
415                 goto glue_error;
416         }
417         mlx5_glue->fork_init();
418         return;
419
420 glue_error:
421         DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
422                 " run-time dependency on rdma-core libraries (libibverbs,"
423                 " libmlx5)");
424         mlx5_glue = NULL;
425 }
426
427 struct ibv_device *
428 mlx5_os_get_ibv_device(struct rte_pci_addr *addr)
429 {
430         int n;
431         struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
432         struct ibv_device *ibv_match = NULL;
433
434         if (ibv_list == NULL) {
435                 rte_errno = ENOSYS;
436                 return NULL;
437         }
438         while (n-- > 0) {
439                 struct rte_pci_addr paddr;
440
441                 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
442                 if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
443                         continue;
444                 if (rte_pci_addr_cmp(addr, &paddr) != 0)
445                         continue;
446                 ibv_match = ibv_list[n];
447                 break;
448         }
449         if (ibv_match == NULL)
450                 rte_errno = ENOENT;
451         mlx5_glue->free_device_list(ibv_list);
452         return ibv_match;
453 }