9e0c823c9755c5590f26eaa374d0603b4a01c8cb
[dpdk.git] / drivers / common / mlx5 / linux / mlx5_common_os.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4
5 #include <unistd.h>
6 #include <string.h>
7 #include <stdio.h>
8 #ifdef RTE_IBVERBS_LINK_DLOPEN
9 #include <dlfcn.h>
10 #endif
11 #include <dirent.h>
12 #include <net/if.h>
13
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16
17 #include "mlx5_common.h"
18 #include "mlx5_common_log.h"
19 #include "mlx5_common_os.h"
20 #include "mlx5_glue.h"
21
22 #ifdef MLX5_GLUE
23 const struct mlx5_glue *mlx5_glue;
24 #endif
25
26 int
27 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
28 {
29         FILE *file;
30         char line[32];
31         int rc = -ENOENT;
32         MKSTR(path, "%s/device/uevent", dev_path);
33
34         file = fopen(path, "rb");
35         if (file == NULL) {
36                 rte_errno = errno;
37                 return -rte_errno;
38         }
39         while (fgets(line, sizeof(line), file) == line) {
40                 size_t len = strlen(line);
41
42                 /* Truncate long lines. */
43                 if (len == (sizeof(line) - 1)) {
44                         while (line[(len - 1)] != '\n') {
45                                 int ret = fgetc(file);
46
47                                 if (ret == EOF)
48                                         goto exit;
49                                 line[(len - 1)] = ret;
50                         }
51                         /* No match for long lines. */
52                         continue;
53                 }
54                 /* Extract information. */
55                 if (sscanf(line,
56                            "PCI_SLOT_NAME="
57                            "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
58                            &pci_addr->domain,
59                            &pci_addr->bus,
60                            &pci_addr->devid,
61                            &pci_addr->function) == 4) {
62                         rc = 0;
63                         break;
64                 }
65         }
66 exit:
67         fclose(file);
68         if (rc)
69                 rte_errno = -rc;
70         return rc;
71 }
72
73 /**
74  * Extract port name, as a number, from sysfs or netlink information.
75  *
76  * @param[in] port_name_in
77  *   String representing the port name.
78  * @param[out] port_info_out
79  *   Port information, including port name as a number and port name
80  *   type if recognized
81  *
82  * @return
83  *   port_name field set according to recognized name format.
84  */
85 void
86 mlx5_translate_port_name(const char *port_name_in,
87                          struct mlx5_switch_info *port_info_out)
88 {
89         char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
90         char *end;
91         int sc_items;
92
93         sc_items = sscanf(port_name_in, "%c%d",
94                           &ctrl, &port_info_out->ctrl_num);
95         if (sc_items == 2 && ctrl == 'c') {
96                 port_name_in++; /* 'c' */
97                 port_name_in += snprintf(NULL, 0, "%d",
98                                           port_info_out->ctrl_num);
99         }
100         /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
101         sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
102                           &pf_c1, &pf_c2, &port_info_out->pf_num,
103                           &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
104         if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
105                 if (vf_c1 == 'v' && vf_c2 == 'f') {
106                         /* Kernel ver >= 5.0 or OFED ver >= 4.6 */
107                         port_info_out->name_type =
108                                         MLX5_PHYS_PORT_NAME_TYPE_PFVF;
109                         return;
110                 }
111                 if (vf_c1 == 's' && vf_c2 == 'f') {
112                         /* Kernel ver >= 5.11 or OFED ver >= 5.1 */
113                         port_info_out->name_type =
114                                         MLX5_PHYS_PORT_NAME_TYPE_PFSF;
115                         return;
116                 }
117         }
118         /*
119          * Check for port-name as a string of the form p0
120          * (support kernel ver >= 5.0, or OFED ver >= 4.6).
121          */
122         sc_items = sscanf(port_name_in, "%c%d%c",
123                           &pf_c1, &port_info_out->port_name, &eol);
124         if (sc_items == 2 && pf_c1 == 'p') {
125                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
126                 return;
127         }
128         /*
129          * Check for port-name as a string of the form pf0
130          * (support kernel ver >= 5.7 for HPF representor on BF).
131          */
132         sc_items = sscanf(port_name_in, "%c%c%d%c",
133                           &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
134         if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
135                 port_info_out->port_name = -1;
136                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
137                 return;
138         }
139         /* Check for port-name as a number (support kernel ver < 5.0 */
140         errno = 0;
141         port_info_out->port_name = strtol(port_name_in, &end, 0);
142         if (!errno &&
143             (size_t)(end - port_name_in) == strlen(port_name_in)) {
144                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
145                 return;
146         }
147         port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
148 }
149
150 int
151 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
152 {
153         DIR *dir;
154         struct dirent *dent;
155         unsigned int dev_type = 0;
156         unsigned int dev_port_prev = ~0u;
157         char match[IF_NAMESIZE] = "";
158
159         MLX5_ASSERT(ibdev_path);
160         {
161                 MKSTR(path, "%s/device/net", ibdev_path);
162
163                 dir = opendir(path);
164                 if (dir == NULL) {
165                         rte_errno = errno;
166                         return -rte_errno;
167                 }
168         }
169         while ((dent = readdir(dir)) != NULL) {
170                 char *name = dent->d_name;
171                 FILE *file;
172                 unsigned int dev_port;
173                 int r;
174
175                 if ((name[0] == '.') &&
176                     ((name[1] == '\0') ||
177                      ((name[1] == '.') && (name[2] == '\0'))))
178                         continue;
179
180                 MKSTR(path, "%s/device/net/%s/%s",
181                       ibdev_path, name,
182                       (dev_type ? "dev_id" : "dev_port"));
183
184                 file = fopen(path, "rb");
185                 if (file == NULL) {
186                         if (errno != ENOENT)
187                                 continue;
188                         /*
189                          * Switch to dev_id when dev_port does not exist as
190                          * is the case with Linux kernel versions < 3.15.
191                          */
192 try_dev_id:
193                         match[0] = '\0';
194                         if (dev_type)
195                                 break;
196                         dev_type = 1;
197                         dev_port_prev = ~0u;
198                         rewinddir(dir);
199                         continue;
200                 }
201                 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
202                 fclose(file);
203                 if (r != 1)
204                         continue;
205                 /*
206                  * Switch to dev_id when dev_port returns the same value for
207                  * all ports. May happen when using a MOFED release older than
208                  * 3.0 with a Linux kernel >= 3.15.
209                  */
210                 if (dev_port == dev_port_prev)
211                         goto try_dev_id;
212                 dev_port_prev = dev_port;
213                 if (dev_port == 0)
214                         strlcpy(match, name, IF_NAMESIZE);
215         }
216         closedir(dir);
217         if (match[0] == '\0') {
218                 rte_errno = ENOENT;
219                 return -rte_errno;
220         }
221         strncpy(ifname, match, IF_NAMESIZE);
222         return 0;
223 }
224
225 #ifdef MLX5_GLUE
226
227 /**
228  * Suffix RTE_EAL_PMD_PATH with "-glue".
229  *
230  * This function performs a sanity check on RTE_EAL_PMD_PATH before
231  * suffixing its last component.
232  *
233  * @param buf[out]
234  *   Output buffer, should be large enough otherwise NULL is returned.
235  * @param size
236  *   Size of @p out.
237  *
238  * @return
239  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
240  */
241 static char *
242 mlx5_glue_path(char *buf, size_t size)
243 {
244         static const char *const bad[] = { "/", ".", "..", NULL };
245         const char *path = RTE_EAL_PMD_PATH;
246         size_t len = strlen(path);
247         size_t off;
248         int i;
249
250         while (len && path[len - 1] == '/')
251                 --len;
252         for (off = len; off && path[off - 1] != '/'; --off)
253                 ;
254         for (i = 0; bad[i]; ++i)
255                 if (!strncmp(path + off, bad[i], (int)(len - off)))
256                         goto error;
257         i = snprintf(buf, size, "%.*s-glue", (int)len, path);
258         if (i == -1 || (size_t)i >= size)
259                 goto error;
260         return buf;
261 error:
262         RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
263                 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
264                 " re-configure DPDK");
265         return NULL;
266 }
267
268 static int
269 mlx5_glue_dlopen(void)
270 {
271         char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
272         void *handle = NULL;
273
274         char const *path[] = {
275                 /*
276                  * A basic security check is necessary before trusting
277                  * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
278                  */
279                 (geteuid() == getuid() && getegid() == getgid() ?
280                  getenv("MLX5_GLUE_PATH") : NULL),
281                 /*
282                  * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
283                  * variant, otherwise let dlopen() look up libraries on its
284                  * own.
285                  */
286                 (*RTE_EAL_PMD_PATH ?
287                  mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
288         };
289         unsigned int i = 0;
290         void **sym;
291         const char *dlmsg;
292
293         while (!handle && i != RTE_DIM(path)) {
294                 const char *end;
295                 size_t len;
296                 int ret;
297
298                 if (!path[i]) {
299                         ++i;
300                         continue;
301                 }
302                 end = strpbrk(path[i], ":;");
303                 if (!end)
304                         end = path[i] + strlen(path[i]);
305                 len = end - path[i];
306                 ret = 0;
307                 do {
308                         char name[ret + 1];
309
310                         ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
311                                        (int)len, path[i],
312                                        (!len || *(end - 1) == '/') ? "" : "/");
313                         if (ret == -1)
314                                 break;
315                         if (sizeof(name) != (size_t)ret + 1)
316                                 continue;
317                         DRV_LOG(DEBUG, "Looking for rdma-core glue as "
318                                 "\"%s\"", name);
319                         handle = dlopen(name, RTLD_LAZY);
320                         break;
321                 } while (1);
322                 path[i] = end + 1;
323                 if (!*end)
324                         ++i;
325         }
326         if (!handle) {
327                 rte_errno = EINVAL;
328                 dlmsg = dlerror();
329                 if (dlmsg)
330                         DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
331                 goto glue_error;
332         }
333         sym = dlsym(handle, "mlx5_glue");
334         if (!sym || !*sym) {
335                 rte_errno = EINVAL;
336                 dlmsg = dlerror();
337                 if (dlmsg)
338                         DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
339                 goto glue_error;
340         }
341         mlx5_glue = *sym;
342         return 0;
343
344 glue_error:
345         if (handle)
346                 dlclose(handle);
347         return -1;
348 }
349
350 #endif
351
352 /**
353  * Initialization routine for run-time dependency on rdma-core.
354  */
355 void
356 mlx5_glue_constructor(void)
357 {
358         /*
359          * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
360          * huge pages. Calling ibv_fork_init() during init allows
361          * applications to use fork() safely for purposes other than
362          * using this PMD, which is not supported in forked processes.
363          */
364         setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
365         /* Match the size of Rx completion entry to the size of a cacheline. */
366         if (RTE_CACHE_LINE_SIZE == 128)
367                 setenv("MLX5_CQE_SIZE", "128", 0);
368         /*
369          * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
370          * cleanup all the Verbs resources even when the device was removed.
371          */
372         setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
373
374 #ifdef MLX5_GLUE
375         if (mlx5_glue_dlopen() != 0)
376                 goto glue_error;
377 #endif
378
379 #ifdef RTE_LIBRTE_MLX5_DEBUG
380         /* Glue structure must not contain any NULL pointers. */
381         {
382                 unsigned int i;
383
384                 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
385                         MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
386         }
387 #endif
388         if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
389                 rte_errno = EINVAL;
390                 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
391                         "required", mlx5_glue->version, MLX5_GLUE_VERSION);
392                 goto glue_error;
393         }
394         mlx5_glue->fork_init();
395         return;
396
397 glue_error:
398         DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
399                 " run-time dependency on rdma-core libraries (libibverbs,"
400                 " libmlx5)");
401         mlx5_glue = NULL;
402 }
403
404 struct ibv_device *
405 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
406 {
407         int n;
408         struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
409         struct ibv_device *ibv_match = NULL;
410
411         if (ibv_list == NULL) {
412                 rte_errno = ENOSYS;
413                 return NULL;
414         }
415         while (n-- > 0) {
416                 struct rte_pci_addr paddr;
417
418                 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
419                 if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
420                         continue;
421                 if (rte_pci_addr_cmp(addr, &paddr) != 0)
422                         continue;
423                 ibv_match = ibv_list[n];
424                 break;
425         }
426         if (ibv_match == NULL)
427                 rte_errno = ENOENT;
428         mlx5_glue->free_device_list(ibv_list);
429         return ibv_match;
430 }