net/mlx5: move Linux-specific functions
[dpdk.git] / drivers / net / mlx5 / linux / mlx5_ethdev_os.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <stdbool.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <errno.h>
15 #include <dirent.h>
16 #include <net/if.h>
17 #include <sys/ioctl.h>
18 #include <sys/socket.h>
19 #include <netinet/in.h>
20 #include <linux/ethtool.h>
21 #include <linux/sockios.h>
22 #include <fcntl.h>
23 #include <stdalign.h>
24 #include <sys/un.h>
25 #include <time.h>
26
27 #include <rte_atomic.h>
28 #include <rte_ethdev_driver.h>
29 #include <rte_bus_pci.h>
30 #include <rte_mbuf.h>
31 #include <rte_common.h>
32 #include <rte_interrupts.h>
33 #include <rte_malloc.h>
34 #include <rte_string_fns.h>
35 #include <rte_rwlock.h>
36 #include <rte_cycles.h>
37
38 #include <mlx5_glue.h>
39 #include <mlx5_devx_cmds.h>
40 #include <mlx5_common.h>
41
42 #include "mlx5.h"
43 #include "mlx5_rxtx.h"
44 #include "mlx5_utils.h"
45
46 /* Supported speed values found in /usr/include/linux/ethtool.h */
47 #ifndef HAVE_SUPPORTED_40000baseKR4_Full
48 #define SUPPORTED_40000baseKR4_Full (1 << 23)
49 #endif
50 #ifndef HAVE_SUPPORTED_40000baseCR4_Full
51 #define SUPPORTED_40000baseCR4_Full (1 << 24)
52 #endif
53 #ifndef HAVE_SUPPORTED_40000baseSR4_Full
54 #define SUPPORTED_40000baseSR4_Full (1 << 25)
55 #endif
56 #ifndef HAVE_SUPPORTED_40000baseLR4_Full
57 #define SUPPORTED_40000baseLR4_Full (1 << 26)
58 #endif
59 #ifndef HAVE_SUPPORTED_56000baseKR4_Full
60 #define SUPPORTED_56000baseKR4_Full (1 << 27)
61 #endif
62 #ifndef HAVE_SUPPORTED_56000baseCR4_Full
63 #define SUPPORTED_56000baseCR4_Full (1 << 28)
64 #endif
65 #ifndef HAVE_SUPPORTED_56000baseSR4_Full
66 #define SUPPORTED_56000baseSR4_Full (1 << 29)
67 #endif
68 #ifndef HAVE_SUPPORTED_56000baseLR4_Full
69 #define SUPPORTED_56000baseLR4_Full (1 << 30)
70 #endif
71
72 /* Add defines in case the running kernel is not the same as user headers. */
73 #ifndef ETHTOOL_GLINKSETTINGS
74 struct ethtool_link_settings {
75         uint32_t cmd;
76         uint32_t speed;
77         uint8_t duplex;
78         uint8_t port;
79         uint8_t phy_address;
80         uint8_t autoneg;
81         uint8_t mdio_support;
82         uint8_t eth_to_mdix;
83         uint8_t eth_tp_mdix_ctrl;
84         int8_t link_mode_masks_nwords;
85         uint32_t reserved[8];
86         uint32_t link_mode_masks[];
87 };
88
89 /* The kernel values can be found in /include/uapi/linux/ethtool.h */
90 #define ETHTOOL_GLINKSETTINGS 0x0000004c
91 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
92 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6
93 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
94 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
95 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
96 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
97 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
98 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
99 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
100 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
101 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
102 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
103 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
104 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
105 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
106 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
107 #endif
108 #ifndef HAVE_ETHTOOL_LINK_MODE_25G
109 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
110 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
111 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
112 #endif
113 #ifndef HAVE_ETHTOOL_LINK_MODE_50G
114 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
115 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
116 #endif
117 #ifndef HAVE_ETHTOOL_LINK_MODE_100G
118 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
119 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
120 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
121 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
122 #endif
123 #ifndef HAVE_ETHTOOL_LINK_MODE_200G
124 #define ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT 62
125 #define ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT 63
126 #define ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT 0 /* 64 - 64 */
127 #define ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT 1 /* 65 - 64 */
128 #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
129 #endif
130
131 /**
132  * Get master interface name from private structure.
133  *
134  * @param[in] dev
135  *   Pointer to Ethernet device.
136  * @param[out] ifname
137  *   Interface name output buffer.
138  *
139  * @return
140  *   0 on success, a negative errno value otherwise and rte_errno is set.
141  */
142 int
143 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE])
144 {
145         DIR *dir;
146         struct dirent *dent;
147         unsigned int dev_type = 0;
148         unsigned int dev_port_prev = ~0u;
149         char match[IF_NAMESIZE] = "";
150
151         MLX5_ASSERT(ibdev_path);
152         {
153                 MKSTR(path, "%s/device/net", ibdev_path);
154
155                 dir = opendir(path);
156                 if (dir == NULL) {
157                         rte_errno = errno;
158                         return -rte_errno;
159                 }
160         }
161         while ((dent = readdir(dir)) != NULL) {
162                 char *name = dent->d_name;
163                 FILE *file;
164                 unsigned int dev_port;
165                 int r;
166
167                 if ((name[0] == '.') &&
168                     ((name[1] == '\0') ||
169                      ((name[1] == '.') && (name[2] == '\0'))))
170                         continue;
171
172                 MKSTR(path, "%s/device/net/%s/%s",
173                       ibdev_path, name,
174                       (dev_type ? "dev_id" : "dev_port"));
175
176                 file = fopen(path, "rb");
177                 if (file == NULL) {
178                         if (errno != ENOENT)
179                                 continue;
180                         /*
181                          * Switch to dev_id when dev_port does not exist as
182                          * is the case with Linux kernel versions < 3.15.
183                          */
184 try_dev_id:
185                         match[0] = '\0';
186                         if (dev_type)
187                                 break;
188                         dev_type = 1;
189                         dev_port_prev = ~0u;
190                         rewinddir(dir);
191                         continue;
192                 }
193                 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
194                 fclose(file);
195                 if (r != 1)
196                         continue;
197                 /*
198                  * Switch to dev_id when dev_port returns the same value for
199                  * all ports. May happen when using a MOFED release older than
200                  * 3.0 with a Linux kernel >= 3.15.
201                  */
202                 if (dev_port == dev_port_prev)
203                         goto try_dev_id;
204                 dev_port_prev = dev_port;
205                 if (dev_port == 0)
206                         strlcpy(match, name, sizeof(match));
207         }
208         closedir(dir);
209         if (match[0] == '\0') {
210                 rte_errno = ENOENT;
211                 return -rte_errno;
212         }
213         strncpy(*ifname, match, sizeof(*ifname));
214         return 0;
215 }
216
217 /**
218  * Get interface name from private structure.
219  *
220  * This is a port representor-aware version of mlx5_get_master_ifname().
221  *
222  * @param[in] dev
223  *   Pointer to Ethernet device.
224  * @param[out] ifname
225  *   Interface name output buffer.
226  *
227  * @return
228  *   0 on success, a negative errno value otherwise and rte_errno is set.
229  */
230 int
231 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
232 {
233         struct mlx5_priv *priv = dev->data->dev_private;
234         unsigned int ifindex;
235
236         MLX5_ASSERT(priv);
237         MLX5_ASSERT(priv->sh);
238         ifindex = mlx5_ifindex(dev);
239         if (!ifindex) {
240                 if (!priv->representor)
241                         return mlx5_get_master_ifname(priv->sh->ibdev_path,
242                                                       ifname);
243                 rte_errno = ENXIO;
244                 return -rte_errno;
245         }
246         if (if_indextoname(ifindex, &(*ifname)[0]))
247                 return 0;
248         rte_errno = errno;
249         return -rte_errno;
250 }
251
252 /**
253  * Perform ifreq ioctl() on associated Ethernet device.
254  *
255  * @param[in] dev
256  *   Pointer to Ethernet device.
257  * @param req
258  *   Request number to pass to ioctl().
259  * @param[out] ifr
260  *   Interface request structure output buffer.
261  *
262  * @return
263  *   0 on success, a negative errno value otherwise and rte_errno is set.
264  */
265 int
266 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
267 {
268         int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
269         int ret = 0;
270
271         if (sock == -1) {
272                 rte_errno = errno;
273                 return -rte_errno;
274         }
275         ret = mlx5_get_ifname(dev, &ifr->ifr_name);
276         if (ret)
277                 goto error;
278         ret = ioctl(sock, req, ifr);
279         if (ret == -1) {
280                 rte_errno = errno;
281                 goto error;
282         }
283         close(sock);
284         return 0;
285 error:
286         close(sock);
287         return -rte_errno;
288 }
289
290 /**
291  * Get device MTU.
292  *
293  * @param dev
294  *   Pointer to Ethernet device.
295  * @param[out] mtu
296  *   MTU value output buffer.
297  *
298  * @return
299  *   0 on success, a negative errno value otherwise and rte_errno is set.
300  */
301 int
302 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
303 {
304         struct ifreq request;
305         int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
306
307         if (ret)
308                 return ret;
309         *mtu = request.ifr_mtu;
310         return 0;
311 }
312
313 /**
314  * Set device MTU.
315  *
316  * @param dev
317  *   Pointer to Ethernet device.
318  * @param mtu
319  *   MTU value to set.
320  *
321  * @return
322  *   0 on success, a negative errno value otherwise and rte_errno is set.
323  */
324 int
325 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
326 {
327         struct ifreq request = { .ifr_mtu = mtu, };
328
329         return mlx5_ifreq(dev, SIOCSIFMTU, &request);
330 }
331
332 /**
333  * Set device flags.
334  *
335  * @param dev
336  *   Pointer to Ethernet device.
337  * @param keep
338  *   Bitmask for flags that must remain untouched.
339  * @param flags
340  *   Bitmask for flags to modify.
341  *
342  * @return
343  *   0 on success, a negative errno value otherwise and rte_errno is set.
344  */
345 int
346 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
347 {
348         struct ifreq request;
349         int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
350
351         if (ret)
352                 return ret;
353         request.ifr_flags &= keep;
354         request.ifr_flags |= flags & ~keep;
355         return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
356 }
357
358 /**
359  * Get device current raw clock counter
360  *
361  * @param dev
362  *   Pointer to Ethernet device structure.
363  * @param[out] time
364  *   Current raw clock counter of the device.
365  *
366  * @return
367  *   0 if the clock has correctly been read
368  *   The value of errno in case of error
369  */
370 int
371 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)
372 {
373         struct mlx5_priv *priv = dev->data->dev_private;
374         struct ibv_context *ctx = priv->sh->ctx;
375         struct ibv_values_ex values;
376         int err = 0;
377
378         values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK;
379         err = mlx5_glue->query_rt_values_ex(ctx, &values);
380         if (err != 0) {
381                 DRV_LOG(WARNING, "Could not query the clock !");
382                 return err;
383         }
384         *clock = values.raw_clock.tv_nsec;
385         return 0;
386 }
387
388 /**
389  * Retrieve the master device for representor in the same switch domain.
390  *
391  * @param dev
392  *   Pointer to representor Ethernet device structure.
393  *
394  * @return
395  *   Master device structure  on success, NULL otherwise.
396  */
397 static struct rte_eth_dev *
398 mlx5_find_master_dev(struct rte_eth_dev *dev)
399 {
400         struct mlx5_priv *priv;
401         uint16_t port_id;
402         uint16_t domain_id;
403
404         priv = dev->data->dev_private;
405         domain_id = priv->domain_id;
406         MLX5_ASSERT(priv->representor);
407         MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
408                 struct mlx5_priv *opriv =
409                         rte_eth_devices[port_id].data->dev_private;
410                 if (opriv &&
411                     opriv->master &&
412                     opriv->domain_id == domain_id &&
413                     opriv->sh == priv->sh)
414                         return &rte_eth_devices[port_id];
415         }
416         return NULL;
417 }
418
419 /**
420  * DPDK callback to retrieve physical link information.
421  *
422  * @param dev
423  *   Pointer to Ethernet device structure.
424  * @param[out] link
425  *   Storage for current link status.
426  *
427  * @return
428  *   0 on success, a negative errno value otherwise and rte_errno is set.
429  */
430 static int
431 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
432                                struct rte_eth_link *link)
433 {
434         struct mlx5_priv *priv = dev->data->dev_private;
435         struct ethtool_cmd edata = {
436                 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
437         };
438         struct ifreq ifr;
439         struct rte_eth_link dev_link;
440         int link_speed = 0;
441         int ret;
442
443         ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
444         if (ret) {
445                 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
446                         dev->data->port_id, strerror(rte_errno));
447                 return ret;
448         }
449         dev_link = (struct rte_eth_link) {
450                 .link_status = ((ifr.ifr_flags & IFF_UP) &&
451                                 (ifr.ifr_flags & IFF_RUNNING)),
452         };
453         ifr = (struct ifreq) {
454                 .ifr_data = (void *)&edata,
455         };
456         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
457         if (ret) {
458                 if (ret == -ENOTSUP && priv->representor) {
459                         struct rte_eth_dev *master;
460
461                         /*
462                          * For representors we can try to inherit link
463                          * settings from the master device. Actually
464                          * link settings do not make a lot of sense
465                          * for representors due to missing physical
466                          * link. The old kernel drivers supported
467                          * emulated settings query for representors,
468                          * the new ones do not, so we have to add
469                          * this code for compatibility issues.
470                          */
471                         master = mlx5_find_master_dev(dev);
472                         if (master) {
473                                 ifr = (struct ifreq) {
474                                         .ifr_data = (void *)&edata,
475                                 };
476                                 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
477                         }
478                 }
479                 if (ret) {
480                         DRV_LOG(WARNING,
481                                 "port %u ioctl(SIOCETHTOOL,"
482                                 " ETHTOOL_GSET) failed: %s",
483                                 dev->data->port_id, strerror(rte_errno));
484                         return ret;
485                 }
486         }
487         link_speed = ethtool_cmd_speed(&edata);
488         if (link_speed == -1)
489                 dev_link.link_speed = ETH_SPEED_NUM_NONE;
490         else
491                 dev_link.link_speed = link_speed;
492         priv->link_speed_capa = 0;
493         if (edata.supported & SUPPORTED_Autoneg)
494                 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
495         if (edata.supported & (SUPPORTED_1000baseT_Full |
496                                SUPPORTED_1000baseKX_Full))
497                 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
498         if (edata.supported & SUPPORTED_10000baseKR_Full)
499                 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
500         if (edata.supported & (SUPPORTED_40000baseKR4_Full |
501                                SUPPORTED_40000baseCR4_Full |
502                                SUPPORTED_40000baseSR4_Full |
503                                SUPPORTED_40000baseLR4_Full))
504                 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
505         dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
506                                 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
507         dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
508                         ETH_LINK_SPEED_FIXED);
509         if (((dev_link.link_speed && !dev_link.link_status) ||
510              (!dev_link.link_speed && dev_link.link_status))) {
511                 rte_errno = EAGAIN;
512                 return -rte_errno;
513         }
514         *link = dev_link;
515         return 0;
516 }
517
518 /**
519  * Retrieve physical link information (unlocked version using new ioctl).
520  *
521  * @param dev
522  *   Pointer to Ethernet device structure.
523  * @param[out] link
524  *   Storage for current link status.
525  *
526  * @return
527  *   0 on success, a negative errno value otherwise and rte_errno is set.
528  */
529 static int
530 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
531                              struct rte_eth_link *link)
532
533 {
534         struct mlx5_priv *priv = dev->data->dev_private;
535         struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
536         struct ifreq ifr;
537         struct rte_eth_link dev_link;
538         struct rte_eth_dev *master = NULL;
539         uint64_t sc;
540         int ret;
541
542         ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
543         if (ret) {
544                 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
545                         dev->data->port_id, strerror(rte_errno));
546                 return ret;
547         }
548         dev_link = (struct rte_eth_link) {
549                 .link_status = ((ifr.ifr_flags & IFF_UP) &&
550                                 (ifr.ifr_flags & IFF_RUNNING)),
551         };
552         ifr = (struct ifreq) {
553                 .ifr_data = (void *)&gcmd,
554         };
555         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
556         if (ret) {
557                 if (ret == -ENOTSUP && priv->representor) {
558                         /*
559                          * For representors we can try to inherit link
560                          * settings from the master device. Actually
561                          * link settings do not make a lot of sense
562                          * for representors due to missing physical
563                          * link. The old kernel drivers supported
564                          * emulated settings query for representors,
565                          * the new ones do not, so we have to add
566                          * this code for compatibility issues.
567                          */
568                         master = mlx5_find_master_dev(dev);
569                         if (master) {
570                                 ifr = (struct ifreq) {
571                                         .ifr_data = (void *)&gcmd,
572                                 };
573                                 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
574                         }
575                 }
576                 if (ret) {
577                         DRV_LOG(DEBUG,
578                                 "port %u ioctl(SIOCETHTOOL,"
579                                 " ETHTOOL_GLINKSETTINGS) failed: %s",
580                                 dev->data->port_id, strerror(rte_errno));
581                         return ret;
582                 }
583         }
584         gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
585
586         alignas(struct ethtool_link_settings)
587         uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
588                      sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
589         struct ethtool_link_settings *ecmd = (void *)data;
590
591         *ecmd = gcmd;
592         ifr.ifr_data = (void *)ecmd;
593         ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);
594         if (ret) {
595                 DRV_LOG(DEBUG,
596                         "port %u ioctl(SIOCETHTOOL,"
597                         "ETHTOOL_GLINKSETTINGS) failed: %s",
598                         dev->data->port_id, strerror(rte_errno));
599                 return ret;
600         }
601         dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE :
602                                                             ecmd->speed;
603         sc = ecmd->link_mode_masks[0] |
604                 ((uint64_t)ecmd->link_mode_masks[1] << 32);
605         priv->link_speed_capa = 0;
606         if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
607                 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
608         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
609                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
610                 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
611         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |
612                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |
613                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))
614                 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
615         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |
616                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))
617                 priv->link_speed_capa |= ETH_LINK_SPEED_20G;
618         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |
619                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |
620                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |
621                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))
622                 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
623         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
624                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
625                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
626                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
627                 priv->link_speed_capa |= ETH_LINK_SPEED_56G;
628         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
629                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
630                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
631                 priv->link_speed_capa |= ETH_LINK_SPEED_25G;
632         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |
633                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))
634                 priv->link_speed_capa |= ETH_LINK_SPEED_50G;
635         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |
636                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |
637                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |
638                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))
639                 priv->link_speed_capa |= ETH_LINK_SPEED_100G;
640         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT) |
641                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT)))
642                 priv->link_speed_capa |= ETH_LINK_SPEED_200G;
643
644         sc = ecmd->link_mode_masks[2] |
645                 ((uint64_t)ecmd->link_mode_masks[3] << 32);
646         if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT) |
647                   MLX5_BITSHIFT
648                        (ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT) |
649                   MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT)))
650                 priv->link_speed_capa |= ETH_LINK_SPEED_200G;
651         dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
652                                 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
653         dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
654                                   ETH_LINK_SPEED_FIXED);
655         if (((dev_link.link_speed && !dev_link.link_status) ||
656              (!dev_link.link_speed && dev_link.link_status))) {
657                 rte_errno = EAGAIN;
658                 return -rte_errno;
659         }
660         *link = dev_link;
661         return 0;
662 }
663
664 /**
665  * DPDK callback to retrieve physical link information.
666  *
667  * @param dev
668  *   Pointer to Ethernet device structure.
669  * @param wait_to_complete
670  *   Wait for request completion.
671  *
672  * @return
673  *   0 if link status was not updated, positive if it was, a negative errno
674  *   value otherwise and rte_errno is set.
675  */
676 int
677 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
678 {
679         int ret;
680         struct rte_eth_link dev_link;
681         time_t start_time = time(NULL);
682         int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
683
684         do {
685                 ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
686                 if (ret == -ENOTSUP)
687                         ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
688                 if (ret == 0)
689                         break;
690                 /* Handle wait to complete situation. */
691                 if ((wait_to_complete || retry) && ret == -EAGAIN) {
692                         if (abs((int)difftime(time(NULL), start_time)) <
693                             MLX5_LINK_STATUS_TIMEOUT) {
694                                 usleep(0);
695                                 continue;
696                         } else {
697                                 rte_errno = EBUSY;
698                                 return -rte_errno;
699                         }
700                 } else if (ret < 0) {
701                         return ret;
702                 }
703         } while (wait_to_complete || retry-- > 0);
704         ret = !!memcmp(&dev->data->dev_link, &dev_link,
705                        sizeof(struct rte_eth_link));
706         dev->data->dev_link = dev_link;
707         return ret;
708 }
709
710 /**
711  * DPDK callback to get flow control status.
712  *
713  * @param dev
714  *   Pointer to Ethernet device structure.
715  * @param[out] fc_conf
716  *   Flow control output buffer.
717  *
718  * @return
719  *   0 on success, a negative errno value otherwise and rte_errno is set.
720  */
721 int
722 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
723 {
724         struct ifreq ifr;
725         struct ethtool_pauseparam ethpause = {
726                 .cmd = ETHTOOL_GPAUSEPARAM
727         };
728         int ret;
729
730         ifr.ifr_data = (void *)&ethpause;
731         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
732         if (ret) {
733                 DRV_LOG(WARNING,
734                         "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
735                         " %s",
736                         dev->data->port_id, strerror(rte_errno));
737                 return ret;
738         }
739         fc_conf->autoneg = ethpause.autoneg;
740         if (ethpause.rx_pause && ethpause.tx_pause)
741                 fc_conf->mode = RTE_FC_FULL;
742         else if (ethpause.rx_pause)
743                 fc_conf->mode = RTE_FC_RX_PAUSE;
744         else if (ethpause.tx_pause)
745                 fc_conf->mode = RTE_FC_TX_PAUSE;
746         else
747                 fc_conf->mode = RTE_FC_NONE;
748         return 0;
749 }
750
751 /**
752  * DPDK callback to modify flow control parameters.
753  *
754  * @param dev
755  *   Pointer to Ethernet device structure.
756  * @param[in] fc_conf
757  *   Flow control parameters.
758  *
759  * @return
760  *   0 on success, a negative errno value otherwise and rte_errno is set.
761  */
762 int
763 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
764 {
765         struct ifreq ifr;
766         struct ethtool_pauseparam ethpause = {
767                 .cmd = ETHTOOL_SPAUSEPARAM
768         };
769         int ret;
770
771         ifr.ifr_data = (void *)&ethpause;
772         ethpause.autoneg = fc_conf->autoneg;
773         if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
774             (fc_conf->mode & RTE_FC_RX_PAUSE))
775                 ethpause.rx_pause = 1;
776         else
777                 ethpause.rx_pause = 0;
778
779         if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
780             (fc_conf->mode & RTE_FC_TX_PAUSE))
781                 ethpause.tx_pause = 1;
782         else
783                 ethpause.tx_pause = 0;
784         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
785         if (ret) {
786                 DRV_LOG(WARNING,
787                         "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
788                         " failed: %s",
789                         dev->data->port_id, strerror(rte_errno));
790                 return ret;
791         }
792         return 0;
793 }
794
795 /**
796  * Handle asynchronous removal event for entire multiport device.
797  *
798  * @param sh
799  *   Infiniband device shared context.
800  */
801 static void
802 mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
803 {
804         uint32_t i;
805
806         for (i = 0; i < sh->max_port; ++i) {
807                 struct rte_eth_dev *dev;
808
809                 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
810                         /*
811                          * Or not existing port either no
812                          * handler installed for this port.
813                          */
814                         continue;
815                 }
816                 dev = &rte_eth_devices[sh->port[i].ih_port_id];
817                 MLX5_ASSERT(dev);
818                 if (dev->data->dev_conf.intr_conf.rmv)
819                         _rte_eth_dev_callback_process
820                                 (dev, RTE_ETH_EVENT_INTR_RMV, NULL);
821         }
822 }
823
824 /**
825  * Handle shared asynchronous events the NIC (removal event
826  * and link status change). Supports multiport IB device.
827  *
828  * @param cb_arg
829  *   Callback argument.
830  */
831 void
832 mlx5_dev_interrupt_handler(void *cb_arg)
833 {
834         struct mlx5_dev_ctx_shared *sh = cb_arg;
835         struct ibv_async_event event;
836
837         /* Read all message from the IB device and acknowledge them. */
838         for (;;) {
839                 struct rte_eth_dev *dev;
840                 uint32_t tmp;
841
842                 if (mlx5_glue->get_async_event(sh->ctx, &event))
843                         break;
844                 /* Retrieve and check IB port index. */
845                 tmp = (uint32_t)event.element.port_num;
846                 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {
847                         /*
848                          * The DEVICE_FATAL event is called once for
849                          * entire device without port specifying.
850                          * We should notify all existing ports.
851                          */
852                         mlx5_glue->ack_async_event(&event);
853                         mlx5_dev_interrupt_device_fatal(sh);
854                         continue;
855                 }
856                 MLX5_ASSERT(tmp && (tmp <= sh->max_port));
857                 if (!tmp) {
858                         /* Unsupported device level event. */
859                         mlx5_glue->ack_async_event(&event);
860                         DRV_LOG(DEBUG,
861                                 "unsupported common event (type %d)",
862                                 event.event_type);
863                         continue;
864                 }
865                 if (tmp > sh->max_port) {
866                         /* Invalid IB port index. */
867                         mlx5_glue->ack_async_event(&event);
868                         DRV_LOG(DEBUG,
869                                 "cannot handle an event (type %d)"
870                                 "due to invalid IB port index (%u)",
871                                 event.event_type, tmp);
872                         continue;
873                 }
874                 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
875                         /* No handler installed. */
876                         mlx5_glue->ack_async_event(&event);
877                         DRV_LOG(DEBUG,
878                                 "cannot handle an event (type %d)"
879                                 "due to no handler installed for port %u",
880                                 event.event_type, tmp);
881                         continue;
882                 }
883                 /* Retrieve ethernet device descriptor. */
884                 tmp = sh->port[tmp - 1].ih_port_id;
885                 dev = &rte_eth_devices[tmp];
886                 MLX5_ASSERT(dev);
887                 if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
888                      event.event_type == IBV_EVENT_PORT_ERR) &&
889                         dev->data->dev_conf.intr_conf.lsc) {
890                         mlx5_glue->ack_async_event(&event);
891                         if (mlx5_link_update(dev, 0) == -EAGAIN) {
892                                 usleep(0);
893                                 continue;
894                         }
895                         _rte_eth_dev_callback_process
896                                 (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
897                         continue;
898                 }
899                 DRV_LOG(DEBUG,
900                         "port %u cannot handle an unknown event (type %d)",
901                         dev->data->port_id, event.event_type);
902                 mlx5_glue->ack_async_event(&event);
903         }
904 }
905
906 /*
907  * Unregister callback handler safely. The handler may be active
908  * while we are trying to unregister it, in this case code -EAGAIN
909  * is returned by rte_intr_callback_unregister(). This routine checks
910  * the return code and tries to unregister handler again.
911  *
912  * @param handle
913  *   interrupt handle
914  * @param cb_fn
915  *   pointer to callback routine
916  * @cb_arg
917  *   opaque callback parameter
918  */
919 void
920 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
921                               rte_intr_callback_fn cb_fn, void *cb_arg)
922 {
923         /*
924          * Try to reduce timeout management overhead by not calling
925          * the timer related routines on the first iteration. If the
926          * unregistering succeeds on first call there will be no
927          * timer calls at all.
928          */
929         uint64_t twait = 0;
930         uint64_t start = 0;
931
932         do {
933                 int ret;
934
935                 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
936                 if (ret >= 0)
937                         return;
938                 if (ret != -EAGAIN) {
939                         DRV_LOG(INFO, "failed to unregister interrupt"
940                                       " handler (error: %d)", ret);
941                         MLX5_ASSERT(false);
942                         return;
943                 }
944                 if (twait) {
945                         struct timespec onems;
946
947                         /* Wait one millisecond and try again. */
948                         onems.tv_sec = 0;
949                         onems.tv_nsec = NS_PER_S / MS_PER_S;
950                         nanosleep(&onems, 0);
951                         /* Check whether one second elapsed. */
952                         if ((rte_get_timer_cycles() - start) <= twait)
953                                 continue;
954                 } else {
955                         /*
956                          * We get the amount of timer ticks for one second.
957                          * If this amount elapsed it means we spent one
958                          * second in waiting. This branch is executed once
959                          * on first iteration.
960                          */
961                         twait = rte_get_timer_hz();
962                         MLX5_ASSERT(twait);
963                 }
964                 /*
965                  * Timeout elapsed, show message (once a second) and retry.
966                  * We have no other acceptable option here, if we ignore
967                  * the unregistering return code the handler will not
968                  * be unregistered, fd will be closed and we may get the
969                  * crush. Hanging and messaging in the loop seems not to be
970                  * the worst choice.
971                  */
972                 DRV_LOG(INFO, "Retrying to unregister interrupt handler");
973                 start = rte_get_timer_cycles();
974         } while (true);
975 }
976
977 /**
978  * Handle DEVX interrupts from the NIC.
979  * This function is probably called from the DPDK host thread.
980  *
981  * @param cb_arg
982  *   Callback argument.
983  */
984 void
985 mlx5_dev_interrupt_handler_devx(void *cb_arg)
986 {
987 #ifndef HAVE_IBV_DEVX_ASYNC
988         (void)cb_arg;
989         return;
990 #else
991         struct mlx5_dev_ctx_shared *sh = cb_arg;
992         union {
993                 struct mlx5dv_devx_async_cmd_hdr cmd_resp;
994                 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
995                             MLX5_ST_SZ_BYTES(traffic_counter) +
996                             sizeof(struct mlx5dv_devx_async_cmd_hdr)];
997         } out;
998         uint8_t *buf = out.buf + sizeof(out.cmd_resp);
999
1000         while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
1001                                                    &out.cmd_resp,
1002                                                    sizeof(out.buf)))
1003                 mlx5_flow_async_pool_query_handle
1004                         (sh, (uint64_t)out.cmd_resp.wr_id,
1005                          mlx5_devx_get_out_command_status(buf));
1006 #endif /* HAVE_IBV_DEVX_ASYNC */
1007 }
1008
1009 /**
1010  * DPDK callback to bring the link DOWN.
1011  *
1012  * @param dev
1013  *   Pointer to Ethernet device structure.
1014  *
1015  * @return
1016  *   0 on success, a negative errno value otherwise and rte_errno is set.
1017  */
1018 int
1019 mlx5_set_link_down(struct rte_eth_dev *dev)
1020 {
1021         return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
1022 }
1023
1024 /**
1025  * DPDK callback to bring the link UP.
1026  *
1027  * @param dev
1028  *   Pointer to Ethernet device structure.
1029  *
1030  * @return
1031  *   0 on success, a negative errno value otherwise and rte_errno is set.
1032  */
1033 int
1034 mlx5_set_link_up(struct rte_eth_dev *dev)
1035 {
1036         return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
1037 }
1038
1039 /**
1040  * Check if mlx5 device was removed.
1041  *
1042  * @param dev
1043  *   Pointer to Ethernet device structure.
1044  *
1045  * @return
1046  *   1 when device is removed, otherwise 0.
1047  */
1048 int
1049 mlx5_is_removed(struct rte_eth_dev *dev)
1050 {
1051         struct ibv_device_attr device_attr;
1052         struct mlx5_priv *priv = dev->data->dev_private;
1053
1054         if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO)
1055                 return 1;
1056         return 0;
1057 }
1058
1059 /**
1060  * Get switch information associated with network interface.
1061  *
1062  * @param ifindex
1063  *   Network interface index.
1064  * @param[out] info
1065  *   Switch information object, populated in case of success.
1066  *
1067  * @return
1068  *   0 on success, a negative errno value otherwise and rte_errno is set.
1069  */
1070 int
1071 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
1072 {
1073         char ifname[IF_NAMESIZE];
1074         char port_name[IF_NAMESIZE];
1075         FILE *file;
1076         struct mlx5_switch_info data = {
1077                 .master = 0,
1078                 .representor = 0,
1079                 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1080                 .port_name = 0,
1081                 .switch_id = 0,
1082         };
1083         DIR *dir;
1084         bool port_switch_id_set = false;
1085         bool device_dir = false;
1086         char c;
1087         int ret;
1088
1089         if (!if_indextoname(ifindex, ifname)) {
1090                 rte_errno = errno;
1091                 return -rte_errno;
1092         }
1093
1094         MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name",
1095               ifname);
1096         MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
1097               ifname);
1098         MKSTR(pci_device, "/sys/class/net/%s/device",
1099               ifname);
1100
1101         file = fopen(phys_port_name, "rb");
1102         if (file != NULL) {
1103                 ret = fscanf(file, "%s", port_name);
1104                 fclose(file);
1105                 if (ret == 1)
1106                         mlx5_translate_port_name(port_name, &data);
1107         }
1108         file = fopen(phys_switch_id, "rb");
1109         if (file == NULL) {
1110                 rte_errno = errno;
1111                 return -rte_errno;
1112         }
1113         port_switch_id_set =
1114                 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
1115                 c == '\n';
1116         fclose(file);
1117         dir = opendir(pci_device);
1118         if (dir != NULL) {
1119                 closedir(dir);
1120                 device_dir = true;
1121         }
1122         if (port_switch_id_set) {
1123                 /* We have some E-Switch configuration. */
1124                 mlx5_sysfs_check_switch_info(device_dir, &data);
1125         }
1126         *info = data;
1127         MLX5_ASSERT(!(data.master && data.representor));
1128         if (data.master && data.representor) {
1129                 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1130                              " and as representor", ifindex);
1131                 rte_errno = ENODEV;
1132                 return -rte_errno;
1133         }
1134         return 0;
1135 }
1136
1137 /**
1138  * Analyze gathered port parameters via sysfs to recognize master
1139  * and representor devices for E-Switch configuration.
1140  *
1141  * @param[in] device_dir
1142  *   flag of presence of "device" directory under port device key.
1143  * @param[inout] switch_info
1144  *   Port information, including port name as a number and port name
1145  *   type if recognized
1146  *
1147  * @return
1148  *   master and representor flags are set in switch_info according to
1149  *   recognized parameters (if any).
1150  */
1151 void
1152 mlx5_sysfs_check_switch_info(bool device_dir,
1153                              struct mlx5_switch_info *switch_info)
1154 {
1155         switch (switch_info->name_type) {
1156         case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1157                 /*
1158                  * Name is not recognized, assume the master,
1159                  * check the device directory presence.
1160                  */
1161                 switch_info->master = device_dir;
1162                 break;
1163         case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1164                 /*
1165                  * Name is not set, this assumes the legacy naming
1166                  * schema for master, just check if there is
1167                  * a device directory.
1168                  */
1169                 switch_info->master = device_dir;
1170                 break;
1171         case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1172                 /* New uplink naming schema recognized. */
1173                 switch_info->master = 1;
1174                 break;
1175         case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1176                 /* Legacy representors naming schema. */
1177                 switch_info->representor = !device_dir;
1178                 break;
1179         case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1180                 /* New representors naming schema. */
1181                 switch_info->representor = 1;
1182                 break;
1183         }
1184 }
1185
1186 /**
1187  * DPDK callback to retrieve plug-in module EEPROM information (type and size).
1188  *
1189  * @param dev
1190  *   Pointer to Ethernet device structure.
1191  * @param[out] modinfo
1192  *   Storage for plug-in module EEPROM information.
1193  *
1194  * @return
1195  *   0 on success, a negative errno value otherwise and rte_errno is set.
1196  */
1197 int
1198 mlx5_get_module_info(struct rte_eth_dev *dev,
1199                      struct rte_eth_dev_module_info *modinfo)
1200 {
1201         struct ethtool_modinfo info = {
1202                 .cmd = ETHTOOL_GMODULEINFO,
1203         };
1204         struct ifreq ifr = (struct ifreq) {
1205                 .ifr_data = (void *)&info,
1206         };
1207         int ret = 0;
1208
1209         if (!dev || !modinfo) {
1210                 DRV_LOG(WARNING, "missing argument, cannot get module info");
1211                 rte_errno = EINVAL;
1212                 return -rte_errno;
1213         }
1214         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1215         if (ret) {
1216                 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1217                         dev->data->port_id, strerror(rte_errno));
1218                 return ret;
1219         }
1220         modinfo->type = info.type;
1221         modinfo->eeprom_len = info.eeprom_len;
1222         return ret;
1223 }
1224
1225 /**
1226  * DPDK callback to retrieve plug-in module EEPROM data.
1227  *
1228  * @param dev
1229  *   Pointer to Ethernet device structure.
1230  * @param[out] info
1231  *   Storage for plug-in module EEPROM data.
1232  *
1233  * @return
1234  *   0 on success, a negative errno value otherwise and rte_errno is set.
1235  */
1236 int mlx5_get_module_eeprom(struct rte_eth_dev *dev,
1237                            struct rte_dev_eeprom_info *info)
1238 {
1239         struct ethtool_eeprom *eeprom;
1240         struct ifreq ifr;
1241         int ret = 0;
1242
1243         if (!dev || !info) {
1244                 DRV_LOG(WARNING, "missing argument, cannot get module eeprom");
1245                 rte_errno = EINVAL;
1246                 return -rte_errno;
1247         }
1248         eeprom = rte_calloc(__func__, 1,
1249                             (sizeof(struct ethtool_eeprom) + info->length), 0);
1250         if (!eeprom) {
1251                 DRV_LOG(WARNING, "port %u cannot allocate memory for "
1252                         "eeprom data", dev->data->port_id);
1253                 rte_errno = ENOMEM;
1254                 return -rte_errno;
1255         }
1256         eeprom->cmd = ETHTOOL_GMODULEEEPROM;
1257         eeprom->offset = info->offset;
1258         eeprom->len = info->length;
1259         ifr = (struct ifreq) {
1260                 .ifr_data = (void *)eeprom,
1261         };
1262         ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1263         if (ret)
1264                 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1265                         dev->data->port_id, strerror(rte_errno));
1266         else
1267                 rte_memcpy(info->data, eeprom->data, info->length);
1268         rte_free(eeprom);
1269         return ret;
1270 }