9d118311818141b8706620e624dd9d086241dce6
[dpdk.git] / drivers / net / mlx5 / mlx5_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <stddef.h>
7 #include <assert.h>
8 #include <inttypes.h>
9 #include <unistd.h>
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <stdlib.h>
15 #include <errno.h>
16 #include <dirent.h>
17 #include <net/if.h>
18 #include <sys/ioctl.h>
19 #include <sys/socket.h>
20 #include <netinet/in.h>
21 #include <linux/ethtool.h>
22 #include <linux/sockios.h>
23 #include <fcntl.h>
24 #include <stdalign.h>
25 #include <sys/un.h>
26 #include <time.h>
27
28 #include <rte_atomic.h>
29 #include <rte_ethdev_driver.h>
30 #include <rte_bus_pci.h>
31 #include <rte_mbuf.h>
32 #include <rte_common.h>
33 #include <rte_interrupts.h>
34 #include <rte_malloc.h>
35 #include <rte_string_fns.h>
36 #include <rte_rwlock.h>
37 #include <rte_cycles.h>
38
39 #include "mlx5.h"
40 #include "mlx5_glue.h"
41 #include "mlx5_rxtx.h"
42 #include "mlx5_utils.h"
43
44 /* Supported speed values found in /usr/include/linux/ethtool.h */
45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full
46 #define SUPPORTED_40000baseKR4_Full (1 << 23)
47 #endif
48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full
49 #define SUPPORTED_40000baseCR4_Full (1 << 24)
50 #endif
51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full
52 #define SUPPORTED_40000baseSR4_Full (1 << 25)
53 #endif
54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full
55 #define SUPPORTED_40000baseLR4_Full (1 << 26)
56 #endif
57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full
58 #define SUPPORTED_56000baseKR4_Full (1 << 27)
59 #endif
60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full
61 #define SUPPORTED_56000baseCR4_Full (1 << 28)
62 #endif
63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full
64 #define SUPPORTED_56000baseSR4_Full (1 << 29)
65 #endif
66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full
67 #define SUPPORTED_56000baseLR4_Full (1 << 30)
68 #endif
69
70 /* Add defines in case the running kernel is not the same as user headers. */
71 #ifndef ETHTOOL_GLINKSETTINGS
72 struct ethtool_link_settings {
73         uint32_t cmd;
74         uint32_t speed;
75         uint8_t duplex;
76         uint8_t port;
77         uint8_t phy_address;
78         uint8_t autoneg;
79         uint8_t mdio_support;
80         uint8_t eth_to_mdix;
81         uint8_t eth_tp_mdix_ctrl;
82         int8_t link_mode_masks_nwords;
83         uint32_t reserved[8];
84         uint32_t link_mode_masks[];
85 };
86
87 #define ETHTOOL_GLINKSETTINGS 0x0000004c
88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6
90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
104 #endif
105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G
106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
109 #endif
110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G
111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
113 #endif
114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G
115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
119 #endif
120
121 /**
122  * Get master interface name from private structure.
123  *
124  * @param[in] dev
125  *   Pointer to Ethernet device.
126  * @param[out] ifname
127  *   Interface name output buffer.
128  *
129  * @return
130  *   0 on success, a negative errno value otherwise and rte_errno is set.
131  */
132 int
133 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE])
134 {
135         DIR *dir;
136         struct dirent *dent;
137         unsigned int dev_type = 0;
138         unsigned int dev_port_prev = ~0u;
139         char match[IF_NAMESIZE] = "";
140
141         assert(ibdev_path);
142         {
143                 MKSTR(path, "%s/device/net", ibdev_path);
144
145                 dir = opendir(path);
146                 if (dir == NULL) {
147                         rte_errno = errno;
148                         return -rte_errno;
149                 }
150         }
151         while ((dent = readdir(dir)) != NULL) {
152                 char *name = dent->d_name;
153                 FILE *file;
154                 unsigned int dev_port;
155                 int r;
156
157                 if ((name[0] == '.') &&
158                     ((name[1] == '\0') ||
159                      ((name[1] == '.') && (name[2] == '\0'))))
160                         continue;
161
162                 MKSTR(path, "%s/device/net/%s/%s",
163                       ibdev_path, name,
164                       (dev_type ? "dev_id" : "dev_port"));
165
166                 file = fopen(path, "rb");
167                 if (file == NULL) {
168                         if (errno != ENOENT)
169                                 continue;
170                         /*
171                          * Switch to dev_id when dev_port does not exist as
172                          * is the case with Linux kernel versions < 3.15.
173                          */
174 try_dev_id:
175                         match[0] = '\0';
176                         if (dev_type)
177                                 break;
178                         dev_type = 1;
179                         dev_port_prev = ~0u;
180                         rewinddir(dir);
181                         continue;
182                 }
183                 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
184                 fclose(file);
185                 if (r != 1)
186                         continue;
187                 /*
188                  * Switch to dev_id when dev_port returns the same value for
189                  * all ports. May happen when using a MOFED release older than
190                  * 3.0 with a Linux kernel >= 3.15.
191                  */
192                 if (dev_port == dev_port_prev)
193                         goto try_dev_id;
194                 dev_port_prev = dev_port;
195                 if (dev_port == 0)
196                         strlcpy(match, name, sizeof(match));
197         }
198         closedir(dir);
199         if (match[0] == '\0') {
200                 rte_errno = ENOENT;
201                 return -rte_errno;
202         }
203         strncpy(*ifname, match, sizeof(*ifname));
204         return 0;
205 }
206
207 /**
208  * Get interface name from private structure.
209  *
210  * This is a port representor-aware version of mlx5_get_master_ifname().
211  *
212  * @param[in] dev
213  *   Pointer to Ethernet device.
214  * @param[out] ifname
215  *   Interface name output buffer.
216  *
217  * @return
218  *   0 on success, a negative errno value otherwise and rte_errno is set.
219  */
220 int
221 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
222 {
223         struct mlx5_priv *priv = dev->data->dev_private;
224         unsigned int ifindex;
225
226         assert(priv);
227         assert(priv->sh);
228         ifindex = mlx5_ifindex(dev);
229         if (!ifindex) {
230                 if (!priv->representor)
231                         return mlx5_get_master_ifname(priv->sh->ibdev_path,
232                                                       ifname);
233                 rte_errno = ENXIO;
234                 return -rte_errno;
235         }
236         if (if_indextoname(ifindex, &(*ifname)[0]))
237                 return 0;
238         rte_errno = errno;
239         return -rte_errno;
240 }
241
242 /**
243  * Get the interface index from device name.
244  *
245  * @param[in] dev
246  *   Pointer to Ethernet device.
247  *
248  * @return
249  *   Nonzero interface index on success, zero otherwise and rte_errno is set.
250  */
251 unsigned int
252 mlx5_ifindex(const struct rte_eth_dev *dev)
253 {
254         struct mlx5_priv *priv = dev->data->dev_private;
255         unsigned int ifindex;
256
257         assert(priv);
258         assert(priv->if_index);
259         ifindex = priv->if_index;
260         if (!ifindex)
261                 rte_errno = ENXIO;
262         return ifindex;
263 }
264
265 /**
266  * Perform ifreq ioctl() on associated Ethernet device.
267  *
268  * @param[in] dev
269  *   Pointer to Ethernet device.
270  * @param req
271  *   Request number to pass to ioctl().
272  * @param[out] ifr
273  *   Interface request structure output buffer.
274  *
275  * @return
276  *   0 on success, a negative errno value otherwise and rte_errno is set.
277  */
278 int
279 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
280 {
281         int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
282         int ret = 0;
283
284         if (sock == -1) {
285                 rte_errno = errno;
286                 return -rte_errno;
287         }
288         ret = mlx5_get_ifname(dev, &ifr->ifr_name);
289         if (ret)
290                 goto error;
291         ret = ioctl(sock, req, ifr);
292         if (ret == -1) {
293                 rte_errno = errno;
294                 goto error;
295         }
296         close(sock);
297         return 0;
298 error:
299         close(sock);
300         return -rte_errno;
301 }
302
303 /**
304  * Get device MTU.
305  *
306  * @param dev
307  *   Pointer to Ethernet device.
308  * @param[out] mtu
309  *   MTU value output buffer.
310  *
311  * @return
312  *   0 on success, a negative errno value otherwise and rte_errno is set.
313  */
314 int
315 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
316 {
317         struct ifreq request;
318         int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
319
320         if (ret)
321                 return ret;
322         *mtu = request.ifr_mtu;
323         return 0;
324 }
325
326 /**
327  * Set device MTU.
328  *
329  * @param dev
330  *   Pointer to Ethernet device.
331  * @param mtu
332  *   MTU value to set.
333  *
334  * @return
335  *   0 on success, a negative errno value otherwise and rte_errno is set.
336  */
337 static int
338 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
339 {
340         struct ifreq request = { .ifr_mtu = mtu, };
341
342         return mlx5_ifreq(dev, SIOCSIFMTU, &request);
343 }
344
345 /**
346  * Set device flags.
347  *
348  * @param dev
349  *   Pointer to Ethernet device.
350  * @param keep
351  *   Bitmask for flags that must remain untouched.
352  * @param flags
353  *   Bitmask for flags to modify.
354  *
355  * @return
356  *   0 on success, a negative errno value otherwise and rte_errno is set.
357  */
358 int
359 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
360 {
361         struct ifreq request;
362         int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
363
364         if (ret)
365                 return ret;
366         request.ifr_flags &= keep;
367         request.ifr_flags |= flags & ~keep;
368         return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
369 }
370
371 /**
372  * DPDK callback for Ethernet device configuration.
373  *
374  * @param dev
375  *   Pointer to Ethernet device structure.
376  *
377  * @return
378  *   0 on success, a negative errno value otherwise and rte_errno is set.
379  */
380 int
381 mlx5_dev_configure(struct rte_eth_dev *dev)
382 {
383         struct mlx5_priv *priv = dev->data->dev_private;
384         unsigned int rxqs_n = dev->data->nb_rx_queues;
385         unsigned int txqs_n = dev->data->nb_tx_queues;
386         unsigned int i;
387         unsigned int j;
388         unsigned int reta_idx_n;
389         const uint8_t use_app_rss_key =
390                 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
391         int ret = 0;
392         unsigned int lro_on = mlx5_lro_on(dev);
393
394         if (use_app_rss_key &&
395             (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
396              MLX5_RSS_HASH_KEY_LEN)) {
397                 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long",
398                         dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN));
399                 rte_errno = EINVAL;
400                 return -rte_errno;
401         }
402         priv->rss_conf.rss_key =
403                 rte_realloc(priv->rss_conf.rss_key,
404                             MLX5_RSS_HASH_KEY_LEN, 0);
405         if (!priv->rss_conf.rss_key) {
406                 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)",
407                         dev->data->port_id, rxqs_n);
408                 rte_errno = ENOMEM;
409                 return -rte_errno;
410         }
411         memcpy(priv->rss_conf.rss_key,
412                use_app_rss_key ?
413                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
414                rss_hash_default_key,
415                MLX5_RSS_HASH_KEY_LEN);
416         priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN;
417         priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
418         priv->rxqs = (void *)dev->data->rx_queues;
419         priv->txqs = (void *)dev->data->tx_queues;
420         if (txqs_n != priv->txqs_n) {
421                 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u",
422                         dev->data->port_id, priv->txqs_n, txqs_n);
423                 priv->txqs_n = txqs_n;
424         }
425         if (rxqs_n > priv->config.ind_table_max_size) {
426                 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)",
427                         dev->data->port_id, rxqs_n);
428                 rte_errno = EINVAL;
429                 return -rte_errno;
430         }
431         if (rxqs_n != priv->rxqs_n) {
432                 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u",
433                         dev->data->port_id, priv->rxqs_n, rxqs_n);
434                 priv->rxqs_n = rxqs_n;
435                 /*
436                  * If the requested number of RX queues is not a power of two,
437                  * use the maximum indirection table size for better balancing.
438                  * The result is always rounded to the next power of two.
439                  */
440                 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
441                                              priv->config.ind_table_max_size :
442                                              rxqs_n));
443                 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n);
444                 if (ret)
445                         return ret;
446                 /*
447                  * When the number of RX queues is not a power of two,
448                  * the remaining table entries are padded with reused WQs
449                  * and hashes are not spread uniformly.
450                  */
451                 for (i = 0, j = 0; (i != reta_idx_n); ++i) {
452                         (*priv->reta_idx)[i] = j;
453                         if (++j == rxqs_n)
454                                 j = 0;
455                 }
456         }
457         if (lro_on && priv->config.cqe_comp) {
458                 /* CQE compressing is not supported for LRO CQEs. */
459                 DRV_LOG(WARNING, "Rx CQE compression isn't supported with LRO");
460                 priv->config.cqe_comp = 0;
461         }
462         ret = mlx5_proc_priv_init(dev);
463         if (ret)
464                 return ret;
465         return 0;
466 }
467
468 /**
469  * Sets default tuning parameters.
470  *
471  * @param dev
472  *   Pointer to Ethernet device.
473  * @param[out] info
474  *   Info structure output buffer.
475  */
476 static void
477 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
478 {
479         struct mlx5_priv *priv = dev->data->dev_private;
480
481         /* Minimum CPU utilization. */
482         info->default_rxportconf.ring_size = 256;
483         info->default_txportconf.ring_size = 256;
484         info->default_rxportconf.burst_size = 64;
485         info->default_txportconf.burst_size = 64;
486         if (priv->link_speed_capa & ETH_LINK_SPEED_100G) {
487                 info->default_rxportconf.nb_queues = 16;
488                 info->default_txportconf.nb_queues = 16;
489                 if (dev->data->nb_rx_queues > 2 ||
490                     dev->data->nb_tx_queues > 2) {
491                         /* Max Throughput. */
492                         info->default_rxportconf.ring_size = 2048;
493                         info->default_txportconf.ring_size = 2048;
494                 }
495         } else {
496                 info->default_rxportconf.nb_queues = 8;
497                 info->default_txportconf.nb_queues = 8;
498                 if (dev->data->nb_rx_queues > 2 ||
499                     dev->data->nb_tx_queues > 2) {
500                         /* Max Throughput. */
501                         info->default_rxportconf.ring_size = 4096;
502                         info->default_txportconf.ring_size = 4096;
503                 }
504         }
505 }
506
507 /**
508  * Sets tx mbuf limiting parameters.
509  *
510  * @param dev
511  *   Pointer to Ethernet device.
512  * @param[out] info
513  *   Info structure output buffer.
514  */
515 static void
516 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
517 {
518         struct mlx5_priv *priv = dev->data->dev_private;
519         struct mlx5_dev_config *config = &priv->config;
520         unsigned int inlen;
521         uint16_t nb_max;
522
523         inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ?
524                 MLX5_SEND_DEF_INLINE_LEN :
525                 (unsigned int)config->txq_inline_max;
526         assert(config->txq_inline_min >= 0);
527         inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min);
528         inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX +
529                                MLX5_ESEG_MIN_INLINE_SIZE -
530                                MLX5_WQE_CSEG_SIZE -
531                                MLX5_WQE_ESEG_SIZE -
532                                MLX5_WQE_DSEG_SIZE * 2);
533         nb_max = (MLX5_WQE_SIZE_MAX +
534                   MLX5_ESEG_MIN_INLINE_SIZE -
535                   MLX5_WQE_CSEG_SIZE -
536                   MLX5_WQE_ESEG_SIZE -
537                   MLX5_WQE_DSEG_SIZE -
538                   inlen) / MLX5_WSEG_SIZE;
539         info->tx_desc_lim.nb_seg_max = nb_max;
540         info->tx_desc_lim.nb_mtu_seg_max = nb_max;
541 }
542
543 /**
544  * DPDK callback to get information about the device.
545  *
546  * @param dev
547  *   Pointer to Ethernet device structure.
548  * @param[out] info
549  *   Info structure output buffer.
550  */
551 void
552 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
553 {
554         struct mlx5_priv *priv = dev->data->dev_private;
555         struct mlx5_dev_config *config = &priv->config;
556         unsigned int max;
557
558         /* FIXME: we should ask the device for these values. */
559         info->min_rx_bufsize = 32;
560         info->max_rx_pktlen = 65536;
561         /*
562          * Since we need one CQ per QP, the limit is the minimum number
563          * between the two values.
564          */
565         max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq,
566                       priv->sh->device_attr.orig_attr.max_qp);
567         /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
568         if (max >= 65535)
569                 max = 65535;
570         info->max_rx_queues = max;
571         info->max_tx_queues = max;
572         info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES;
573         info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev);
574         info->rx_offload_capa = (mlx5_get_rx_port_offloads(dev) |
575                                  info->rx_queue_offload_capa);
576         info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
577         info->if_index = mlx5_ifindex(dev);
578         info->reta_size = priv->reta_idx_n ?
579                 priv->reta_idx_n : config->ind_table_max_size;
580         info->hash_key_size = MLX5_RSS_HASH_KEY_LEN;
581         info->speed_capa = priv->link_speed_capa;
582         info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
583         mlx5_set_default_params(dev, info);
584         mlx5_set_txlimit_params(dev, info);
585         info->switch_info.name = dev->data->name;
586         info->switch_info.domain_id = priv->domain_id;
587         info->switch_info.port_id = priv->representor_id;
588         if (priv->representor) {
589                 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0);
590                 uint16_t port_id[i];
591
592                 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i);
593                 while (i--) {
594                         struct mlx5_priv *opriv =
595                                 rte_eth_devices[port_id[i]].data->dev_private;
596
597                         if (!opriv ||
598                             opriv->representor ||
599                             opriv->domain_id != priv->domain_id)
600                                 continue;
601                         /*
602                          * Override switch name with that of the master
603                          * device.
604                          */
605                         info->switch_info.name = opriv->dev_data->name;
606                         break;
607                 }
608         }
609 }
610
611 /**
612  * Get device current raw clock counter
613  *
614  * @param dev
615  *   Pointer to Ethernet device structure.
616  * @param[out] time
617  *   Current raw clock counter of the device.
618  *
619  * @return
620  *   0 if the clock has correctly been read
621  *   The value of errno in case of error
622  */
623 int
624 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)
625 {
626         struct mlx5_priv *priv = dev->data->dev_private;
627         struct ibv_context *ctx = priv->sh->ctx;
628         struct ibv_values_ex values;
629         int err = 0;
630
631         values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK;
632         err = mlx5_glue->query_rt_values_ex(ctx, &values);
633         if (err != 0) {
634                 DRV_LOG(WARNING, "Could not query the clock !");
635                 return err;
636         }
637         *clock = values.raw_clock.tv_nsec;
638         return 0;
639 }
640
641 /**
642  * Get firmware version of a device.
643  *
644  * @param dev
645  *   Ethernet device port.
646  * @param fw_ver
647  *   String output allocated by caller.
648  * @param fw_size
649  *   Size of the output string, including terminating null byte.
650  *
651  * @return
652  *   0 on success, or the size of the non truncated string if too big.
653  */
654 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
655 {
656         struct mlx5_priv *priv = dev->data->dev_private;
657         struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr;
658         size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1;
659
660         if (fw_size < size)
661                 return size;
662         if (fw_ver != NULL)
663                 strlcpy(fw_ver, attr->fw_ver, fw_size);
664         return 0;
665 }
666
667 /**
668  * Get supported packet types.
669  *
670  * @param dev
671  *   Pointer to Ethernet device structure.
672  *
673  * @return
674  *   A pointer to the supported Packet types array.
675  */
676 const uint32_t *
677 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
678 {
679         static const uint32_t ptypes[] = {
680                 /* refers to rxq_cq_to_pkt_type() */
681                 RTE_PTYPE_L2_ETHER,
682                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
683                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
684                 RTE_PTYPE_L4_NONFRAG,
685                 RTE_PTYPE_L4_FRAG,
686                 RTE_PTYPE_L4_TCP,
687                 RTE_PTYPE_L4_UDP,
688                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
689                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
690                 RTE_PTYPE_INNER_L4_NONFRAG,
691                 RTE_PTYPE_INNER_L4_FRAG,
692                 RTE_PTYPE_INNER_L4_TCP,
693                 RTE_PTYPE_INNER_L4_UDP,
694                 RTE_PTYPE_UNKNOWN
695         };
696
697         if (dev->rx_pkt_burst == mlx5_rx_burst ||
698             dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
699             dev->rx_pkt_burst == mlx5_rx_burst_vec)
700                 return ptypes;
701         return NULL;
702 }
703
704 /**
705  * Retrieve the master device for representor in the same switch domain.
706  *
707  * @param dev
708  *   Pointer to representor Ethernet device structure.
709  *
710  * @return
711  *   Master device structure  on success, NULL otherwise.
712  */
713
714 static struct rte_eth_dev *
715 mlx5_find_master_dev(struct rte_eth_dev *dev)
716 {
717         struct mlx5_priv *priv;
718         uint16_t port_id;
719         uint16_t domain_id;
720
721         priv = dev->data->dev_private;
722         domain_id = priv->domain_id;
723         assert(priv->representor);
724         RTE_ETH_FOREACH_DEV_OF(port_id, dev->device) {
725                 priv = rte_eth_devices[port_id].data->dev_private;
726                 if (priv &&
727                     priv->master &&
728                     priv->domain_id == domain_id)
729                         return &rte_eth_devices[port_id];
730         }
731         return NULL;
732 }
733
734 /**
735  * DPDK callback to retrieve physical link information.
736  *
737  * @param dev
738  *   Pointer to Ethernet device structure.
739  * @param[out] link
740  *   Storage for current link status.
741  *
742  * @return
743  *   0 on success, a negative errno value otherwise and rte_errno is set.
744  */
745 static int
746 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
747                                struct rte_eth_link *link)
748 {
749         struct mlx5_priv *priv = dev->data->dev_private;
750         struct ethtool_cmd edata = {
751                 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
752         };
753         struct ifreq ifr;
754         struct rte_eth_link dev_link;
755         int link_speed = 0;
756         int ret;
757
758         ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
759         if (ret) {
760                 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
761                         dev->data->port_id, strerror(rte_errno));
762                 return ret;
763         }
764         dev_link = (struct rte_eth_link) {
765                 .link_status = ((ifr.ifr_flags & IFF_UP) &&
766                                 (ifr.ifr_flags & IFF_RUNNING)),
767         };
768         ifr = (struct ifreq) {
769                 .ifr_data = (void *)&edata,
770         };
771         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
772         if (ret) {
773                 if (ret == -ENOTSUP && priv->representor) {
774                         struct rte_eth_dev *master;
775
776                         /*
777                          * For representors we can try to inherit link
778                          * settings from the master device. Actually
779                          * link settings do not make a lot of sense
780                          * for representors due to missing physical
781                          * link. The old kernel drivers supported
782                          * emulated settings query for representors,
783                          * the new ones do not, so we have to add
784                          * this code for compatibility issues.
785                          */
786                         master = mlx5_find_master_dev(dev);
787                         if (master) {
788                                 ifr = (struct ifreq) {
789                                         .ifr_data = (void *)&edata,
790                                 };
791                                 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
792                         }
793                 }
794                 if (ret) {
795                         DRV_LOG(WARNING,
796                                 "port %u ioctl(SIOCETHTOOL,"
797                                 " ETHTOOL_GSET) failed: %s",
798                                 dev->data->port_id, strerror(rte_errno));
799                         return ret;
800                 }
801         }
802         link_speed = ethtool_cmd_speed(&edata);
803         if (link_speed == -1)
804                 dev_link.link_speed = ETH_SPEED_NUM_NONE;
805         else
806                 dev_link.link_speed = link_speed;
807         priv->link_speed_capa = 0;
808         if (edata.supported & SUPPORTED_Autoneg)
809                 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
810         if (edata.supported & (SUPPORTED_1000baseT_Full |
811                                SUPPORTED_1000baseKX_Full))
812                 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
813         if (edata.supported & SUPPORTED_10000baseKR_Full)
814                 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
815         if (edata.supported & (SUPPORTED_40000baseKR4_Full |
816                                SUPPORTED_40000baseCR4_Full |
817                                SUPPORTED_40000baseSR4_Full |
818                                SUPPORTED_40000baseLR4_Full))
819                 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
820         dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
821                                 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
822         dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
823                         ETH_LINK_SPEED_FIXED);
824         if (((dev_link.link_speed && !dev_link.link_status) ||
825              (!dev_link.link_speed && dev_link.link_status))) {
826                 rte_errno = EAGAIN;
827                 return -rte_errno;
828         }
829         *link = dev_link;
830         return 0;
831 }
832
833 /**
834  * Retrieve physical link information (unlocked version using new ioctl).
835  *
836  * @param dev
837  *   Pointer to Ethernet device structure.
838  * @param[out] link
839  *   Storage for current link status.
840  *
841  * @return
842  *   0 on success, a negative errno value otherwise and rte_errno is set.
843  */
844 static int
845 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
846                              struct rte_eth_link *link)
847
848 {
849         struct mlx5_priv *priv = dev->data->dev_private;
850         struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
851         struct ifreq ifr;
852         struct rte_eth_link dev_link;
853         struct rte_eth_dev *master = NULL;
854         uint64_t sc;
855         int ret;
856
857         ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
858         if (ret) {
859                 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
860                         dev->data->port_id, strerror(rte_errno));
861                 return ret;
862         }
863         dev_link = (struct rte_eth_link) {
864                 .link_status = ((ifr.ifr_flags & IFF_UP) &&
865                                 (ifr.ifr_flags & IFF_RUNNING)),
866         };
867         ifr = (struct ifreq) {
868                 .ifr_data = (void *)&gcmd,
869         };
870         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
871         if (ret) {
872                 if (ret == -ENOTSUP && priv->representor) {
873                         /*
874                          * For representors we can try to inherit link
875                          * settings from the master device. Actually
876                          * link settings do not make a lot of sense
877                          * for representors due to missing physical
878                          * link. The old kernel drivers supported
879                          * emulated settings query for representors,
880                          * the new ones do not, so we have to add
881                          * this code for compatibility issues.
882                          */
883                         master = mlx5_find_master_dev(dev);
884                         if (master) {
885                                 ifr = (struct ifreq) {
886                                         .ifr_data = (void *)&gcmd,
887                                 };
888                                 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
889                         }
890                 }
891                 if (ret) {
892                         DRV_LOG(DEBUG,
893                                 "port %u ioctl(SIOCETHTOOL,"
894                                 " ETHTOOL_GLINKSETTINGS) failed: %s",
895                                 dev->data->port_id, strerror(rte_errno));
896                         return ret;
897                 }
898
899         }
900         gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
901
902         alignas(struct ethtool_link_settings)
903         uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
904                      sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
905         struct ethtool_link_settings *ecmd = (void *)data;
906
907         *ecmd = gcmd;
908         ifr.ifr_data = (void *)ecmd;
909         ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);
910         if (ret) {
911                 DRV_LOG(DEBUG,
912                         "port %u ioctl(SIOCETHTOOL,"
913                         "ETHTOOL_GLINKSETTINGS) failed: %s",
914                         dev->data->port_id, strerror(rte_errno));
915                 return ret;
916         }
917         dev_link.link_speed = ecmd->speed;
918         sc = ecmd->link_mode_masks[0] |
919                 ((uint64_t)ecmd->link_mode_masks[1] << 32);
920         priv->link_speed_capa = 0;
921         if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
922                 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
923         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
924                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
925                 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
926         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |
927                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |
928                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))
929                 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
930         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |
931                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))
932                 priv->link_speed_capa |= ETH_LINK_SPEED_20G;
933         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |
934                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |
935                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |
936                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))
937                 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
938         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
939                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
940                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
941                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
942                 priv->link_speed_capa |= ETH_LINK_SPEED_56G;
943         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
944                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
945                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
946                 priv->link_speed_capa |= ETH_LINK_SPEED_25G;
947         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |
948                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))
949                 priv->link_speed_capa |= ETH_LINK_SPEED_50G;
950         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |
951                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |
952                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |
953                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))
954                 priv->link_speed_capa |= ETH_LINK_SPEED_100G;
955         dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
956                                 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
957         dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
958                                   ETH_LINK_SPEED_FIXED);
959         if (((dev_link.link_speed && !dev_link.link_status) ||
960              (!dev_link.link_speed && dev_link.link_status))) {
961                 rte_errno = EAGAIN;
962                 return -rte_errno;
963         }
964         *link = dev_link;
965         return 0;
966 }
967
968 /**
969  * DPDK callback to retrieve physical link information.
970  *
971  * @param dev
972  *   Pointer to Ethernet device structure.
973  * @param wait_to_complete
974  *   Wait for request completion.
975  *
976  * @return
977  *   0 if link status was not updated, positive if it was, a negative errno
978  *   value otherwise and rte_errno is set.
979  */
980 int
981 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
982 {
983         int ret;
984         struct rte_eth_link dev_link;
985         time_t start_time = time(NULL);
986
987         do {
988                 ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
989                 if (ret == -ENOTSUP)
990                         ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
991                 if (ret == 0)
992                         break;
993                 /* Handle wait to complete situation. */
994                 if (wait_to_complete && ret == -EAGAIN) {
995                         if (abs((int)difftime(time(NULL), start_time)) <
996                             MLX5_LINK_STATUS_TIMEOUT) {
997                                 usleep(0);
998                                 continue;
999                         } else {
1000                                 rte_errno = EBUSY;
1001                                 return -rte_errno;
1002                         }
1003                 } else if (ret < 0) {
1004                         return ret;
1005                 }
1006         } while (wait_to_complete);
1007         ret = !!memcmp(&dev->data->dev_link, &dev_link,
1008                        sizeof(struct rte_eth_link));
1009         dev->data->dev_link = dev_link;
1010         return ret;
1011 }
1012
1013 /**
1014  * DPDK callback to change the MTU.
1015  *
1016  * @param dev
1017  *   Pointer to Ethernet device structure.
1018  * @param in_mtu
1019  *   New MTU.
1020  *
1021  * @return
1022  *   0 on success, a negative errno value otherwise and rte_errno is set.
1023  */
1024 int
1025 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
1026 {
1027         struct mlx5_priv *priv = dev->data->dev_private;
1028         uint16_t kern_mtu = 0;
1029         int ret;
1030
1031         ret = mlx5_get_mtu(dev, &kern_mtu);
1032         if (ret)
1033                 return ret;
1034         /* Set kernel interface MTU first. */
1035         ret = mlx5_set_mtu(dev, mtu);
1036         if (ret)
1037                 return ret;
1038         ret = mlx5_get_mtu(dev, &kern_mtu);
1039         if (ret)
1040                 return ret;
1041         if (kern_mtu == mtu) {
1042                 priv->mtu = mtu;
1043                 DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
1044                         dev->data->port_id, mtu);
1045                 return 0;
1046         }
1047         rte_errno = EAGAIN;
1048         return -rte_errno;
1049 }
1050
1051 /**
1052  * DPDK callback to get flow control status.
1053  *
1054  * @param dev
1055  *   Pointer to Ethernet device structure.
1056  * @param[out] fc_conf
1057  *   Flow control output buffer.
1058  *
1059  * @return
1060  *   0 on success, a negative errno value otherwise and rte_errno is set.
1061  */
1062 int
1063 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
1064 {
1065         struct ifreq ifr;
1066         struct ethtool_pauseparam ethpause = {
1067                 .cmd = ETHTOOL_GPAUSEPARAM
1068         };
1069         int ret;
1070
1071         ifr.ifr_data = (void *)&ethpause;
1072         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1073         if (ret) {
1074                 DRV_LOG(WARNING,
1075                         "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
1076                         " %s",
1077                         dev->data->port_id, strerror(rte_errno));
1078                 return ret;
1079         }
1080         fc_conf->autoneg = ethpause.autoneg;
1081         if (ethpause.rx_pause && ethpause.tx_pause)
1082                 fc_conf->mode = RTE_FC_FULL;
1083         else if (ethpause.rx_pause)
1084                 fc_conf->mode = RTE_FC_RX_PAUSE;
1085         else if (ethpause.tx_pause)
1086                 fc_conf->mode = RTE_FC_TX_PAUSE;
1087         else
1088                 fc_conf->mode = RTE_FC_NONE;
1089         return 0;
1090 }
1091
1092 /**
1093  * DPDK callback to modify flow control parameters.
1094  *
1095  * @param dev
1096  *   Pointer to Ethernet device structure.
1097  * @param[in] fc_conf
1098  *   Flow control parameters.
1099  *
1100  * @return
1101  *   0 on success, a negative errno value otherwise and rte_errno is set.
1102  */
1103 int
1104 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
1105 {
1106         struct ifreq ifr;
1107         struct ethtool_pauseparam ethpause = {
1108                 .cmd = ETHTOOL_SPAUSEPARAM
1109         };
1110         int ret;
1111
1112         ifr.ifr_data = (void *)&ethpause;
1113         ethpause.autoneg = fc_conf->autoneg;
1114         if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
1115             (fc_conf->mode & RTE_FC_RX_PAUSE))
1116                 ethpause.rx_pause = 1;
1117         else
1118                 ethpause.rx_pause = 0;
1119
1120         if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
1121             (fc_conf->mode & RTE_FC_TX_PAUSE))
1122                 ethpause.tx_pause = 1;
1123         else
1124                 ethpause.tx_pause = 0;
1125         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1126         if (ret) {
1127                 DRV_LOG(WARNING,
1128                         "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
1129                         " failed: %s",
1130                         dev->data->port_id, strerror(rte_errno));
1131                 return ret;
1132         }
1133         return 0;
1134 }
1135
1136 /**
1137  * Get PCI information from struct ibv_device.
1138  *
1139  * @param device
1140  *   Pointer to Ethernet device structure.
1141  * @param[out] pci_addr
1142  *   PCI bus address output buffer.
1143  *
1144  * @return
1145  *   0 on success, a negative errno value otherwise and rte_errno is set.
1146  */
1147 int
1148 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
1149                             struct rte_pci_addr *pci_addr)
1150 {
1151         FILE *file;
1152         char line[32];
1153         MKSTR(path, "%s/device/uevent", device->ibdev_path);
1154
1155         file = fopen(path, "rb");
1156         if (file == NULL) {
1157                 rte_errno = errno;
1158                 return -rte_errno;
1159         }
1160         while (fgets(line, sizeof(line), file) == line) {
1161                 size_t len = strlen(line);
1162                 int ret;
1163
1164                 /* Truncate long lines. */
1165                 if (len == (sizeof(line) - 1))
1166                         while (line[(len - 1)] != '\n') {
1167                                 ret = fgetc(file);
1168                                 if (ret == EOF)
1169                                         break;
1170                                 line[(len - 1)] = ret;
1171                         }
1172                 /* Extract information. */
1173                 if (sscanf(line,
1174                            "PCI_SLOT_NAME="
1175                            "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
1176                            &pci_addr->domain,
1177                            &pci_addr->bus,
1178                            &pci_addr->devid,
1179                            &pci_addr->function) == 4) {
1180                         ret = 0;
1181                         break;
1182                 }
1183         }
1184         fclose(file);
1185         return 0;
1186 }
1187
1188 /**
1189  * Handle asynchronous removal event for entire multiport device.
1190  *
1191  * @param sh
1192  *   Infiniband device shared context.
1193  */
1194 static void
1195 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh)
1196 {
1197         uint32_t i;
1198
1199         for (i = 0; i < sh->max_port; ++i) {
1200                 struct rte_eth_dev *dev;
1201
1202                 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
1203                         /*
1204                          * Or not existing port either no
1205                          * handler installed for this port.
1206                          */
1207                         continue;
1208                 }
1209                 dev = &rte_eth_devices[sh->port[i].ih_port_id];
1210                 assert(dev);
1211                 if (dev->data->dev_conf.intr_conf.rmv)
1212                         _rte_eth_dev_callback_process
1213                                 (dev, RTE_ETH_EVENT_INTR_RMV, NULL);
1214         }
1215 }
1216
1217 /**
1218  * Handle shared asynchronous events the NIC (removal event
1219  * and link status change). Supports multiport IB device.
1220  *
1221  * @param cb_arg
1222  *   Callback argument.
1223  */
1224 void
1225 mlx5_dev_interrupt_handler(void *cb_arg)
1226 {
1227         struct mlx5_ibv_shared *sh = cb_arg;
1228         struct ibv_async_event event;
1229
1230         /* Read all message from the IB device and acknowledge them. */
1231         for (;;) {
1232                 struct rte_eth_dev *dev;
1233                 uint32_t tmp;
1234
1235                 if (mlx5_glue->get_async_event(sh->ctx, &event))
1236                         break;
1237                 /* Retrieve and check IB port index. */
1238                 tmp = (uint32_t)event.element.port_num;
1239                 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {
1240                         /*
1241                          * The DEVICE_FATAL event is called once for
1242                          * entire device without port specifying.
1243                          * We should notify all existing ports.
1244                          */
1245                         mlx5_glue->ack_async_event(&event);
1246                         mlx5_dev_interrupt_device_fatal(sh);
1247                         continue;
1248                 }
1249                 assert(tmp && (tmp <= sh->max_port));
1250                 if (!tmp) {
1251                         /* Unsupported devive level event. */
1252                         mlx5_glue->ack_async_event(&event);
1253                         DRV_LOG(DEBUG,
1254                                 "unsupported common event (type %d)",
1255                                 event.event_type);
1256                         continue;
1257                 }
1258                 if (tmp > sh->max_port) {
1259                         /* Invalid IB port index. */
1260                         mlx5_glue->ack_async_event(&event);
1261                         DRV_LOG(DEBUG,
1262                                 "cannot handle an event (type %d)"
1263                                 "due to invalid IB port index (%u)",
1264                                 event.event_type, tmp);
1265                         continue;
1266                 }
1267                 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
1268                         /* No handler installed. */
1269                         mlx5_glue->ack_async_event(&event);
1270                         DRV_LOG(DEBUG,
1271                                 "cannot handle an event (type %d)"
1272                                 "due to no handler installed for port %u",
1273                                 event.event_type, tmp);
1274                         continue;
1275                 }
1276                 /* Retrieve ethernet device descriptor. */
1277                 tmp = sh->port[tmp - 1].ih_port_id;
1278                 dev = &rte_eth_devices[tmp];
1279                 assert(dev);
1280                 if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
1281                      event.event_type == IBV_EVENT_PORT_ERR) &&
1282                         dev->data->dev_conf.intr_conf.lsc) {
1283                         mlx5_glue->ack_async_event(&event);
1284                         if (mlx5_link_update(dev, 0) == -EAGAIN) {
1285                                 usleep(0);
1286                                 continue;
1287                         }
1288                         _rte_eth_dev_callback_process
1289                                 (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
1290                         continue;
1291                 }
1292                 DRV_LOG(DEBUG,
1293                         "port %u cannot handle an unknown event (type %d)",
1294                         dev->data->port_id, event.event_type);
1295                 mlx5_glue->ack_async_event(&event);
1296         }
1297 }
1298
1299 /*
1300  * Unregister callback handler safely. The handler may be active
1301  * while we are trying to unregister it, in this case code -EAGAIN
1302  * is returned by rte_intr_callback_unregister(). This routine checks
1303  * the return code and tries to unregister handler again.
1304  *
1305  * @param handle
1306  *   interrupt handle
1307  * @param cb_fn
1308  *   pointer to callback routine
1309  * @cb_arg
1310  *   opaque callback parameter
1311  */
1312 void
1313 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
1314                               rte_intr_callback_fn cb_fn, void *cb_arg)
1315 {
1316         /*
1317          * Try to reduce timeout management overhead by not calling
1318          * the timer related routines on the first iteration. If the
1319          * unregistering succeeds on first call there will be no
1320          * timer calls at all.
1321          */
1322         uint64_t twait = 0;
1323         uint64_t start = 0;
1324
1325         do {
1326                 int ret;
1327
1328                 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
1329                 if (ret >= 0)
1330                         return;
1331                 if (ret != -EAGAIN) {
1332                         DRV_LOG(INFO, "failed to unregister interrupt"
1333                                       " handler (error: %d)", ret);
1334                         assert(false);
1335                         return;
1336                 }
1337                 if (twait) {
1338                         struct timespec onems;
1339
1340                         /* Wait one millisecond and try again. */
1341                         onems.tv_sec = 0;
1342                         onems.tv_nsec = NS_PER_S / MS_PER_S;
1343                         nanosleep(&onems, 0);
1344                         /* Check whether one second elapsed. */
1345                         if ((rte_get_timer_cycles() - start) <= twait)
1346                                 continue;
1347                 } else {
1348                         /*
1349                          * We get the amount of timer ticks for one second.
1350                          * If this amount elapsed it means we spent one
1351                          * second in waiting. This branch is executed once
1352                          * on first iteration.
1353                          */
1354                         twait = rte_get_timer_hz();
1355                         assert(twait);
1356                 }
1357                 /*
1358                  * Timeout elapsed, show message (once a second) and retry.
1359                  * We have no other acceptable option here, if we ignore
1360                  * the unregistering return code the handler will not
1361                  * be unregistered, fd will be closed and we may get the
1362                  * crush. Hanging and messaging in the loop seems not to be
1363                  * the worst choice.
1364                  */
1365                 DRV_LOG(INFO, "Retrying to unregister interrupt handler");
1366                 start = rte_get_timer_cycles();
1367         } while (true);
1368 }
1369
1370 /**
1371  * Handle DEVX interrupts from the NIC.
1372  * This function is probably called from the DPDK host thread.
1373  *
1374  * @param cb_arg
1375  *   Callback argument.
1376  */
1377 void
1378 mlx5_dev_interrupt_handler_devx(void *cb_arg)
1379 {
1380 #ifndef HAVE_IBV_DEVX_ASYNC
1381         (void)cb_arg;
1382         return;
1383 #else
1384         struct mlx5_ibv_shared *sh = cb_arg;
1385         union {
1386                 struct mlx5dv_devx_async_cmd_hdr cmd_resp;
1387                 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
1388                             MLX5_ST_SZ_BYTES(traffic_counter) +
1389                             sizeof(struct mlx5dv_devx_async_cmd_hdr)];
1390         } out;
1391         uint8_t *buf = out.buf + sizeof(out.cmd_resp);
1392
1393         while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
1394                                                    &out.cmd_resp,
1395                                                    sizeof(out.buf)))
1396                 mlx5_flow_async_pool_query_handle
1397                         (sh, (uint64_t)out.cmd_resp.wr_id,
1398                          mlx5_devx_get_out_command_status(buf));
1399 #endif /* HAVE_IBV_DEVX_ASYNC */
1400 }
1401
1402 /**
1403  * Uninstall shared asynchronous device events handler.
1404  * This function is implemented to support event sharing
1405  * between multiple ports of single IB device.
1406  *
1407  * @param dev
1408  *   Pointer to Ethernet device.
1409  */
1410 static void
1411 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev)
1412 {
1413         struct mlx5_priv *priv = dev->data->dev_private;
1414         struct mlx5_ibv_shared *sh = priv->sh;
1415
1416         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1417                 return;
1418         pthread_mutex_lock(&sh->intr_mutex);
1419         assert(priv->ibv_port);
1420         assert(priv->ibv_port <= sh->max_port);
1421         assert(dev->data->port_id < RTE_MAX_ETHPORTS);
1422         if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS)
1423                 goto exit;
1424         assert(sh->port[priv->ibv_port - 1].ih_port_id ==
1425                                         (uint32_t)dev->data->port_id);
1426         assert(sh->intr_cnt);
1427         sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1428         if (!sh->intr_cnt || --sh->intr_cnt)
1429                 goto exit;
1430         mlx5_intr_callback_unregister(&sh->intr_handle,
1431                                      mlx5_dev_interrupt_handler, sh);
1432         sh->intr_handle.fd = 0;
1433         sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
1434         if (sh->intr_handle_devx.fd) {
1435                 rte_intr_callback_unregister(&sh->intr_handle_devx,
1436                                              mlx5_dev_interrupt_handler_devx,
1437                                              sh);
1438                 sh->intr_handle_devx.fd = 0;
1439                 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN;
1440         }
1441         if (sh->devx_comp) {
1442                 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
1443                 sh->devx_comp = NULL;
1444         }
1445 exit:
1446         pthread_mutex_unlock(&sh->intr_mutex);
1447 }
1448
1449 /**
1450  * Install shared asynchronous device events handler.
1451  * This function is implemented to support event sharing
1452  * between multiple ports of single IB device.
1453  *
1454  * @param dev
1455  *   Pointer to Ethernet device.
1456  */
1457 static void
1458 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev)
1459 {
1460         struct mlx5_priv *priv = dev->data->dev_private;
1461         struct mlx5_ibv_shared *sh = priv->sh;
1462         int ret;
1463         int flags;
1464
1465         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1466                 return;
1467         pthread_mutex_lock(&sh->intr_mutex);
1468         assert(priv->ibv_port);
1469         assert(priv->ibv_port <= sh->max_port);
1470         assert(dev->data->port_id < RTE_MAX_ETHPORTS);
1471         if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) {
1472                 /* The handler is already installed for this port. */
1473                 assert(sh->intr_cnt);
1474                 goto exit;
1475         }
1476         sh->port[priv->ibv_port - 1].ih_port_id = (uint32_t)dev->data->port_id;
1477         if (sh->intr_cnt) {
1478                 sh->intr_cnt++;
1479                 goto exit;
1480         }
1481         /* No shared handler installed. */
1482         assert(sh->ctx->async_fd > 0);
1483         flags = fcntl(sh->ctx->async_fd, F_GETFL);
1484         ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
1485         if (ret) {
1486                 DRV_LOG(INFO, "failed to change file descriptor"
1487                               " async event queue");
1488                 goto error;
1489         }
1490         sh->intr_handle.fd = sh->ctx->async_fd;
1491         sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
1492         rte_intr_callback_register(&sh->intr_handle,
1493                                    mlx5_dev_interrupt_handler, sh);
1494         if (priv->config.devx) {
1495 #ifndef HAVE_IBV_DEVX_ASYNC
1496                 goto error_unregister;
1497 #else
1498                 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
1499                 if (sh->devx_comp) {
1500                         flags = fcntl(sh->devx_comp->fd, F_GETFL);
1501                         ret = fcntl(sh->devx_comp->fd, F_SETFL,
1502                                     flags | O_NONBLOCK);
1503                         if (ret) {
1504                                 DRV_LOG(INFO, "failed to change file descriptor"
1505                                               " devx async event queue");
1506                                 goto error_unregister;
1507                         }
1508                         sh->intr_handle_devx.fd = sh->devx_comp->fd;
1509                         sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
1510                         rte_intr_callback_register
1511                                 (&sh->intr_handle_devx,
1512                                  mlx5_dev_interrupt_handler_devx, sh);
1513                 } else {
1514                         DRV_LOG(INFO, "failed to create devx async command "
1515                                 "completion");
1516                         goto error_unregister;
1517                 }
1518 #endif /* HAVE_IBV_DEVX_ASYNC */
1519         }
1520         sh->intr_cnt++;
1521         goto exit;
1522 error_unregister:
1523         rte_intr_callback_unregister(&sh->intr_handle,
1524                                      mlx5_dev_interrupt_handler, sh);
1525 error:
1526         /* Indicate there will be no interrupts. */
1527         dev->data->dev_conf.intr_conf.lsc = 0;
1528         dev->data->dev_conf.intr_conf.rmv = 0;
1529         sh->intr_handle.fd = 0;
1530         sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
1531         sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1532 exit:
1533         pthread_mutex_unlock(&sh->intr_mutex);
1534 }
1535
1536 /**
1537  * Uninstall interrupt handler.
1538  *
1539  * @param dev
1540  *   Pointer to Ethernet device.
1541  */
1542 void
1543 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev)
1544 {
1545         mlx5_dev_shared_handler_uninstall(dev);
1546 }
1547
1548 /**
1549  * Install interrupt handler.
1550  *
1551  * @param dev
1552  *   Pointer to Ethernet device.
1553  */
1554 void
1555 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev)
1556 {
1557         mlx5_dev_shared_handler_install(dev);
1558 }
1559
1560 /**
1561  * DPDK callback to bring the link DOWN.
1562  *
1563  * @param dev
1564  *   Pointer to Ethernet device structure.
1565  *
1566  * @return
1567  *   0 on success, a negative errno value otherwise and rte_errno is set.
1568  */
1569 int
1570 mlx5_set_link_down(struct rte_eth_dev *dev)
1571 {
1572         return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
1573 }
1574
1575 /**
1576  * DPDK callback to bring the link UP.
1577  *
1578  * @param dev
1579  *   Pointer to Ethernet device structure.
1580  *
1581  * @return
1582  *   0 on success, a negative errno value otherwise and rte_errno is set.
1583  */
1584 int
1585 mlx5_set_link_up(struct rte_eth_dev *dev)
1586 {
1587         return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
1588 }
1589
1590 /**
1591  * Configure the RX function to use.
1592  *
1593  * @param dev
1594  *   Pointer to private data structure.
1595  *
1596  * @return
1597  *   Pointer to selected Rx burst function.
1598  */
1599 eth_rx_burst_t
1600 mlx5_select_rx_function(struct rte_eth_dev *dev)
1601 {
1602         eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst;
1603
1604         assert(dev != NULL);
1605         if (mlx5_check_vec_rx_support(dev) > 0) {
1606                 rx_pkt_burst = mlx5_rx_burst_vec;
1607                 DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
1608                         dev->data->port_id);
1609         } else if (mlx5_mprq_enabled(dev)) {
1610                 rx_pkt_burst = mlx5_rx_burst_mprq;
1611         }
1612         return rx_pkt_burst;
1613 }
1614
1615 /**
1616  * Check if mlx5 device was removed.
1617  *
1618  * @param dev
1619  *   Pointer to Ethernet device structure.
1620  *
1621  * @return
1622  *   1 when device is removed, otherwise 0.
1623  */
1624 int
1625 mlx5_is_removed(struct rte_eth_dev *dev)
1626 {
1627         struct ibv_device_attr device_attr;
1628         struct mlx5_priv *priv = dev->data->dev_private;
1629
1630         if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO)
1631                 return 1;
1632         return 0;
1633 }
1634
1635 /**
1636  * Get port ID list of mlx5 instances sharing a common device.
1637  *
1638  * @param[in] dev
1639  *   Device to look for.
1640  * @param[out] port_list
1641  *   Result buffer for collected port IDs.
1642  * @param port_list_n
1643  *   Maximum number of entries in result buffer. If 0, @p port_list can be
1644  *   NULL.
1645  *
1646  * @return
1647  *   Number of matching instances regardless of the @p port_list_n
1648  *   parameter, 0 if none were found.
1649  */
1650 unsigned int
1651 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
1652                     unsigned int port_list_n)
1653 {
1654         uint16_t id;
1655         unsigned int n = 0;
1656
1657         RTE_ETH_FOREACH_DEV_OF(id, dev) {
1658                 if (n < port_list_n)
1659                         port_list[n] = id;
1660                 n++;
1661         }
1662         return n;
1663 }
1664
1665 /**
1666  * Get the E-Switch domain id this port belongs to.
1667  *
1668  * @param[in] port
1669  *   Device port id.
1670  * @param[out] es_domain_id
1671  *   E-Switch domain id.
1672  * @param[out] es_port_id
1673  *   The port id of the port in the E-Switch.
1674  *
1675  * @return
1676  *   0 on success, a negative errno value otherwise and rte_errno is set.
1677  */
1678 int
1679 mlx5_port_to_eswitch_info(uint16_t port,
1680                           uint16_t *es_domain_id, uint16_t *es_port_id)
1681 {
1682         struct rte_eth_dev *dev;
1683         struct mlx5_priv *priv;
1684
1685         if (port >= RTE_MAX_ETHPORTS) {
1686                 rte_errno = EINVAL;
1687                 return -rte_errno;
1688         }
1689         if (!rte_eth_dev_is_valid_port(port)) {
1690                 rte_errno = ENODEV;
1691                 return -rte_errno;
1692         }
1693         dev = &rte_eth_devices[port];
1694         priv = dev->data->dev_private;
1695         if (!(priv->representor || priv->master)) {
1696                 rte_errno = EINVAL;
1697                 return -rte_errno;
1698         }
1699         if (es_domain_id)
1700                 *es_domain_id = priv->domain_id;
1701         if (es_port_id)
1702                 *es_port_id = priv->vport_id;
1703         return 0;
1704 }
1705
1706 /**
1707  * Get switch information associated with network interface.
1708  *
1709  * @param ifindex
1710  *   Network interface index.
1711  * @param[out] info
1712  *   Switch information object, populated in case of success.
1713  *
1714  * @return
1715  *   0 on success, a negative errno value otherwise and rte_errno is set.
1716  */
1717 int
1718 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
1719 {
1720         char ifname[IF_NAMESIZE];
1721         char port_name[IF_NAMESIZE];
1722         FILE *file;
1723         struct mlx5_switch_info data = {
1724                 .master = 0,
1725                 .representor = 0,
1726                 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1727                 .port_name = 0,
1728                 .switch_id = 0,
1729         };
1730         DIR *dir;
1731         bool port_switch_id_set = false;
1732         bool device_dir = false;
1733         char c;
1734         int ret;
1735
1736         if (!if_indextoname(ifindex, ifname)) {
1737                 rte_errno = errno;
1738                 return -rte_errno;
1739         }
1740
1741         MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name",
1742               ifname);
1743         MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
1744               ifname);
1745         MKSTR(pci_device, "/sys/class/net/%s/device",
1746               ifname);
1747
1748         file = fopen(phys_port_name, "rb");
1749         if (file != NULL) {
1750                 ret = fscanf(file, "%s", port_name);
1751                 fclose(file);
1752                 if (ret == 1)
1753                         mlx5_translate_port_name(port_name, &data);
1754         }
1755         file = fopen(phys_switch_id, "rb");
1756         if (file == NULL) {
1757                 rte_errno = errno;
1758                 return -rte_errno;
1759         }
1760         port_switch_id_set =
1761                 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
1762                 c == '\n';
1763         fclose(file);
1764         dir = opendir(pci_device);
1765         if (dir != NULL) {
1766                 closedir(dir);
1767                 device_dir = true;
1768         }
1769         if (port_switch_id_set) {
1770                 /* We have some E-Switch configuration. */
1771                 mlx5_sysfs_check_switch_info(device_dir, &data);
1772         }
1773         *info = data;
1774         assert(!(data.master && data.representor));
1775         if (data.master && data.representor) {
1776                 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1777                              " and as representor", ifindex);
1778                 rte_errno = ENODEV;
1779                 return -rte_errno;
1780         }
1781         return 0;
1782 }
1783
1784 /**
1785  * Analyze gathered port parameters via Netlink to recognize master
1786  * and representor devices for E-Switch configuration.
1787  *
1788  * @param[in] num_vf_set
1789  *   flag of presence of number of VFs port attribute.
1790  * @param[inout] switch_info
1791  *   Port information, including port name as a number and port name
1792  *   type if recognized
1793  *
1794  * @return
1795  *   master and representor flags are set in switch_info according to
1796  *   recognized parameters (if any).
1797  */
1798 void
1799 mlx5_nl_check_switch_info(bool num_vf_set,
1800                           struct mlx5_switch_info *switch_info)
1801 {
1802         switch (switch_info->name_type) {
1803         case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1804                 /*
1805                  * Name is not recognized, assume the master,
1806                  * check the number of VFs key presence.
1807                  */
1808                 switch_info->master = num_vf_set;
1809                 break;
1810         case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1811                 /*
1812                  * Name is not set, this assumes the legacy naming
1813                  * schema for master, just check if there is a
1814                  * number of VFs key.
1815                  */
1816                 switch_info->master = num_vf_set;
1817                 break;
1818         case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1819                 /* New uplink naming schema recognized. */
1820                 switch_info->master = 1;
1821                 break;
1822         case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1823                 /* Legacy representors naming schema. */
1824                 switch_info->representor = !num_vf_set;
1825                 break;
1826         case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1827                 /* New representors naming schema. */
1828                 switch_info->representor = 1;
1829                 break;
1830         }
1831 }
1832
1833 /**
1834  * Analyze gathered port parameters via sysfs to recognize master
1835  * and representor devices for E-Switch configuration.
1836  *
1837  * @param[in] device_dir
1838  *   flag of presence of "device" directory under port device key.
1839  * @param[inout] switch_info
1840  *   Port information, including port name as a number and port name
1841  *   type if recognized
1842  *
1843  * @return
1844  *   master and representor flags are set in switch_info according to
1845  *   recognized parameters (if any).
1846  */
1847 void
1848 mlx5_sysfs_check_switch_info(bool device_dir,
1849                              struct mlx5_switch_info *switch_info)
1850 {
1851         switch (switch_info->name_type) {
1852         case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1853                 /*
1854                  * Name is not recognized, assume the master,
1855                  * check the device directory presence.
1856                  */
1857                 switch_info->master = device_dir;
1858                 break;
1859         case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1860                 /*
1861                  * Name is not set, this assumes the legacy naming
1862                  * schema for master, just check if there is
1863                  * a device directory.
1864                  */
1865                 switch_info->master = device_dir;
1866                 break;
1867         case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1868                 /* New uplink naming schema recognized. */
1869                 switch_info->master = 1;
1870                 break;
1871         case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1872                 /* Legacy representors naming schema. */
1873                 switch_info->representor = !device_dir;
1874                 break;
1875         case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1876                 /* New representors naming schema. */
1877                 switch_info->representor = 1;
1878                 break;
1879         }
1880 }
1881
1882 /**
1883  * Extract port name, as a number, from sysfs or netlink information.
1884  *
1885  * @param[in] port_name_in
1886  *   String representing the port name.
1887  * @param[out] port_info_out
1888  *   Port information, including port name as a number and port name
1889  *   type if recognized
1890  *
1891  * @return
1892  *   port_name field set according to recognized name format.
1893  */
1894 void
1895 mlx5_translate_port_name(const char *port_name_in,
1896                          struct mlx5_switch_info *port_info_out)
1897 {
1898         char pf_c1, pf_c2, vf_c1, vf_c2;
1899         char *end;
1900         int sc_items;
1901
1902         /*
1903          * Check for port-name as a string of the form pf0vf0
1904          * (support kernel ver >= 5.0 or OFED ver >= 4.6).
1905          */
1906         sc_items = sscanf(port_name_in, "%c%c%d%c%c%d",
1907                           &pf_c1, &pf_c2, &port_info_out->pf_num,
1908                           &vf_c1, &vf_c2, &port_info_out->port_name);
1909         if (sc_items == 6 &&
1910             pf_c1 == 'p' && pf_c2 == 'f' &&
1911             vf_c1 == 'v' && vf_c2 == 'f') {
1912                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF;
1913                 return;
1914         }
1915         /*
1916          * Check for port-name as a string of the form p0
1917          * (support kernel ver >= 5.0, or OFED ver >= 4.6).
1918          */
1919         sc_items = sscanf(port_name_in, "%c%d",
1920                           &pf_c1, &port_info_out->port_name);
1921         if (sc_items == 2 && pf_c1 == 'p') {
1922                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
1923                 return;
1924         }
1925         /* Check for port-name as a number (support kernel ver < 5.0 */
1926         errno = 0;
1927         port_info_out->port_name = strtol(port_name_in, &end, 0);
1928         if (!errno &&
1929             (size_t)(end - port_name_in) == strlen(port_name_in)) {
1930                 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
1931                 return;
1932         }
1933         port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1934         return;
1935 }