1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox.
19 #include <sys/ioctl.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <linux/ethtool.h>
23 #include <linux/sockios.h>
29 #include <rte_atomic.h>
30 #include <rte_ethdev_driver.h>
31 #include <rte_bus_pci.h>
33 #include <rte_common.h>
34 #include <rte_interrupts.h>
35 #include <rte_malloc.h>
38 #include "mlx5_glue.h"
39 #include "mlx5_rxtx.h"
40 #include "mlx5_utils.h"
42 /* Add defines in case the running kernel is not the same as user headers. */
43 #ifndef ETHTOOL_GLINKSETTINGS
44 struct ethtool_link_settings {
53 uint8_t eth_tp_mdix_ctrl;
54 int8_t link_mode_masks_nwords;
56 uint32_t link_mode_masks[];
59 #define ETHTOOL_GLINKSETTINGS 0x0000004c
60 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
61 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6
62 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
63 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
64 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
65 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
66 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
67 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
68 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
69 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
70 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
71 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
72 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
73 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
74 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
75 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
77 #ifndef HAVE_ETHTOOL_LINK_MODE_25G
78 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
79 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
80 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
82 #ifndef HAVE_ETHTOOL_LINK_MODE_50G
83 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
84 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
86 #ifndef HAVE_ETHTOOL_LINK_MODE_100G
87 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
88 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
89 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
90 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
94 * Get interface name from private structure.
97 * Pointer to Ethernet device.
99 * Interface name output buffer.
102 * 0 on success, a negative errno value otherwise and rte_errno is set.
105 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
107 struct priv *priv = dev->data->dev_private;
110 unsigned int dev_type = 0;
111 unsigned int dev_port_prev = ~0u;
112 char match[IF_NAMESIZE] = "";
115 MKSTR(path, "%s/device/net", priv->ibdev_path);
123 while ((dent = readdir(dir)) != NULL) {
124 char *name = dent->d_name;
126 unsigned int dev_port;
129 if ((name[0] == '.') &&
130 ((name[1] == '\0') ||
131 ((name[1] == '.') && (name[2] == '\0'))))
134 MKSTR(path, "%s/device/net/%s/%s",
135 priv->ibdev_path, name,
136 (dev_type ? "dev_id" : "dev_port"));
138 file = fopen(path, "rb");
143 * Switch to dev_id when dev_port does not exist as
144 * is the case with Linux kernel versions < 3.15.
155 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
160 * Switch to dev_id when dev_port returns the same value for
161 * all ports. May happen when using a MOFED release older than
162 * 3.0 with a Linux kernel >= 3.15.
164 if (dev_port == dev_port_prev)
166 dev_port_prev = dev_port;
167 if (dev_port == (priv->port - 1u))
168 snprintf(match, sizeof(match), "%s", name);
171 if (match[0] == '\0') {
175 strncpy(*ifname, match, sizeof(*ifname));
180 * Perform ifreq ioctl() on associated Ethernet device.
183 * Pointer to Ethernet device.
185 * Request number to pass to ioctl().
187 * Interface request structure output buffer.
190 * 0 on success, a negative errno value otherwise and rte_errno is set.
193 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
195 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
202 ret = mlx5_get_ifname(dev, &ifr->ifr_name);
205 ret = ioctl(sock, req, ifr);
221 * Pointer to Ethernet device.
223 * MTU value output buffer.
226 * 0 on success, a negative errno value otherwise and rte_errno is set.
229 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
231 struct ifreq request;
232 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
236 *mtu = request.ifr_mtu;
244 * Pointer to Ethernet device.
249 * 0 on success, a negative errno value otherwise and rte_errno is set.
252 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
254 struct ifreq request = { .ifr_mtu = mtu, };
256 return mlx5_ifreq(dev, SIOCSIFMTU, &request);
263 * Pointer to Ethernet device.
265 * Bitmask for flags that must remain untouched.
267 * Bitmask for flags to modify.
270 * 0 on success, a negative errno value otherwise and rte_errno is set.
273 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
275 struct ifreq request;
276 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
280 request.ifr_flags &= keep;
281 request.ifr_flags |= flags & ~keep;
282 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
286 * DPDK callback for Ethernet device configuration.
289 * Pointer to Ethernet device structure.
292 * 0 on success, a negative errno value otherwise and rte_errno is set.
295 mlx5_dev_configure(struct rte_eth_dev *dev)
297 struct priv *priv = dev->data->dev_private;
298 unsigned int rxqs_n = dev->data->nb_rx_queues;
299 unsigned int txqs_n = dev->data->nb_tx_queues;
302 unsigned int reta_idx_n;
303 const uint8_t use_app_rss_key =
304 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
305 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev);
306 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
307 uint64_t supp_rx_offloads =
308 (mlx5_get_rx_port_offloads() |
309 mlx5_get_rx_queue_offloads(dev));
310 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
313 if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
315 "port %u some Tx offloads are not supported requested"
316 " 0x%" PRIx64 " supported 0x%" PRIx64,
317 dev->data->port_id, tx_offloads, supp_tx_offloads);
321 if ((rx_offloads & supp_rx_offloads) != rx_offloads) {
323 "port %u some Rx offloads are not supported requested"
324 " 0x%" PRIx64 " supported 0x%" PRIx64,
325 dev->data->port_id, rx_offloads, supp_rx_offloads);
329 if (use_app_rss_key &&
330 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
331 rss_hash_default_key_len)) {
332 DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long",
333 dev->data->port_id, rss_hash_default_key_len);
337 priv->rss_conf.rss_key =
338 rte_realloc(priv->rss_conf.rss_key,
339 rss_hash_default_key_len, 0);
340 if (!priv->rss_conf.rss_key) {
341 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)",
342 dev->data->port_id, rxqs_n);
346 memcpy(priv->rss_conf.rss_key,
348 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
349 rss_hash_default_key,
350 rss_hash_default_key_len);
351 priv->rss_conf.rss_key_len = rss_hash_default_key_len;
352 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
353 priv->rxqs = (void *)dev->data->rx_queues;
354 priv->txqs = (void *)dev->data->tx_queues;
355 if (txqs_n != priv->txqs_n) {
356 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u",
357 dev->data->port_id, priv->txqs_n, txqs_n);
358 priv->txqs_n = txqs_n;
360 if (rxqs_n > priv->config.ind_table_max_size) {
361 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)",
362 dev->data->port_id, rxqs_n);
366 if (rxqs_n == priv->rxqs_n)
368 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u",
369 dev->data->port_id, priv->rxqs_n, rxqs_n);
370 priv->rxqs_n = rxqs_n;
371 /* If the requested number of RX queues is not a power of two, use the
372 * maximum indirection table size for better balancing.
373 * The result is always rounded to the next power of two. */
374 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
375 priv->config.ind_table_max_size :
377 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n);
380 /* When the number of RX queues is not a power of two, the remaining
381 * table entries are padded with reused WQs and hashes are not spread
383 for (i = 0, j = 0; (i != reta_idx_n); ++i) {
384 (*priv->reta_idx)[i] = j;
392 * DPDK callback to get information about the device.
395 * Pointer to Ethernet device structure.
397 * Info structure output buffer.
400 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
402 struct priv *priv = dev->data->dev_private;
403 struct mlx5_dev_config *config = &priv->config;
405 char ifname[IF_NAMESIZE];
407 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
408 /* FIXME: we should ask the device for these values. */
409 info->min_rx_bufsize = 32;
410 info->max_rx_pktlen = 65536;
412 * Since we need one CQ per QP, the limit is the minimum number
413 * between the two values.
415 max = RTE_MIN(priv->device_attr.orig_attr.max_cq,
416 priv->device_attr.orig_attr.max_qp);
417 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
420 info->max_rx_queues = max;
421 info->max_tx_queues = max;
422 info->max_mac_addrs = RTE_DIM(priv->mac);
423 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev);
424 info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
425 info->rx_queue_offload_capa);
426 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
427 if (mlx5_get_ifname(dev, &ifname) == 0)
428 info->if_index = if_nametoindex(ifname);
429 info->reta_size = priv->reta_idx_n ?
430 priv->reta_idx_n : config->ind_table_max_size;
431 info->hash_key_size = rss_hash_default_key_len;
432 info->speed_capa = priv->link_speed_capa;
433 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
437 * Get supported packet types.
440 * Pointer to Ethernet device structure.
443 * A pointer to the supported Packet types array.
446 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
448 static const uint32_t ptypes[] = {
449 /* refers to rxq_cq_to_pkt_type() */
451 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
452 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
453 RTE_PTYPE_L4_NONFRAG,
457 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
458 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
459 RTE_PTYPE_INNER_L4_NONFRAG,
460 RTE_PTYPE_INNER_L4_FRAG,
461 RTE_PTYPE_INNER_L4_TCP,
462 RTE_PTYPE_INNER_L4_UDP,
466 if (dev->rx_pkt_burst == mlx5_rx_burst ||
467 dev->rx_pkt_burst == mlx5_rx_burst_vec)
473 * DPDK callback to retrieve physical link information.
476 * Pointer to Ethernet device structure.
478 * Storage for current link status.
481 * 0 on success, a negative errno value otherwise and rte_errno is set.
484 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
485 struct rte_eth_link *link)
487 struct priv *priv = dev->data->dev_private;
488 struct ethtool_cmd edata = {
489 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
492 struct rte_eth_link dev_link;
496 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
498 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
499 dev->data->port_id, strerror(rte_errno));
502 memset(&dev_link, 0, sizeof(dev_link));
503 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
504 (ifr.ifr_flags & IFF_RUNNING));
505 ifr.ifr_data = (void *)&edata;
506 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
509 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
510 dev->data->port_id, strerror(rte_errno));
513 link_speed = ethtool_cmd_speed(&edata);
514 if (link_speed == -1)
515 dev_link.link_speed = 0;
517 dev_link.link_speed = link_speed;
518 priv->link_speed_capa = 0;
519 if (edata.supported & SUPPORTED_Autoneg)
520 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
521 if (edata.supported & (SUPPORTED_1000baseT_Full |
522 SUPPORTED_1000baseKX_Full))
523 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
524 if (edata.supported & SUPPORTED_10000baseKR_Full)
525 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
526 if (edata.supported & (SUPPORTED_40000baseKR4_Full |
527 SUPPORTED_40000baseCR4_Full |
528 SUPPORTED_40000baseSR4_Full |
529 SUPPORTED_40000baseLR4_Full))
530 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
531 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
532 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
533 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
534 ETH_LINK_SPEED_FIXED);
535 if ((dev_link.link_speed && !dev_link.link_status) ||
536 (!dev_link.link_speed && dev_link.link_status)) {
545 * Retrieve physical link information (unlocked version using new ioctl).
548 * Pointer to Ethernet device structure.
550 * Storage for current link status.
553 * 0 on success, a negative errno value otherwise and rte_errno is set.
556 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
557 struct rte_eth_link *link)
560 struct priv *priv = dev->data->dev_private;
561 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
563 struct rte_eth_link dev_link;
567 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
569 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
570 dev->data->port_id, strerror(rte_errno));
573 memset(&dev_link, 0, sizeof(dev_link));
574 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
575 (ifr.ifr_flags & IFF_RUNNING));
576 ifr.ifr_data = (void *)&gcmd;
577 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
580 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
582 dev->data->port_id, strerror(rte_errno));
585 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
587 alignas(struct ethtool_link_settings)
588 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
589 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
590 struct ethtool_link_settings *ecmd = (void *)data;
593 ifr.ifr_data = (void *)ecmd;
594 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
597 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
599 dev->data->port_id, strerror(rte_errno));
602 dev_link.link_speed = ecmd->speed;
603 sc = ecmd->link_mode_masks[0] |
604 ((uint64_t)ecmd->link_mode_masks[1] << 32);
605 priv->link_speed_capa = 0;
606 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
607 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
608 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
609 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
610 priv->link_speed_capa |= ETH_LINK_SPEED_1G;
611 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |
612 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |
613 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))
614 priv->link_speed_capa |= ETH_LINK_SPEED_10G;
615 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |
616 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))
617 priv->link_speed_capa |= ETH_LINK_SPEED_20G;
618 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |
619 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |
620 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |
621 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))
622 priv->link_speed_capa |= ETH_LINK_SPEED_40G;
623 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
624 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
625 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
626 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
627 priv->link_speed_capa |= ETH_LINK_SPEED_56G;
628 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
629 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
630 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
631 priv->link_speed_capa |= ETH_LINK_SPEED_25G;
632 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |
633 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))
634 priv->link_speed_capa |= ETH_LINK_SPEED_50G;
635 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |
636 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |
637 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |
638 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))
639 priv->link_speed_capa |= ETH_LINK_SPEED_100G;
640 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
641 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
642 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
643 ETH_LINK_SPEED_FIXED);
644 if ((dev_link.link_speed && !dev_link.link_status) ||
645 (!dev_link.link_speed && dev_link.link_status)) {
654 * DPDK callback to retrieve physical link information.
657 * Pointer to Ethernet device structure.
658 * @param wait_to_complete
659 * Wait for request completion.
662 * 0 if link status was not updated, positive if it was, a negative errno
663 * value otherwise and rte_errno is set.
666 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
669 struct rte_eth_link dev_link;
670 time_t start_time = time(NULL);
673 ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
675 ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
678 /* Handle wait to complete situation. */
679 if (wait_to_complete && ret == -EAGAIN) {
680 if (abs((int)difftime(time(NULL), start_time)) <
681 MLX5_LINK_STATUS_TIMEOUT) {
688 } else if (ret < 0) {
691 } while (wait_to_complete);
692 ret = !!memcmp(&dev->data->dev_link, &dev_link,
693 sizeof(struct rte_eth_link));
694 dev->data->dev_link = dev_link;
699 * DPDK callback to change the MTU.
702 * Pointer to Ethernet device structure.
707 * 0 on success, a negative errno value otherwise and rte_errno is set.
710 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
712 struct priv *priv = dev->data->dev_private;
713 uint16_t kern_mtu = 0;
716 ret = mlx5_get_mtu(dev, &kern_mtu);
719 /* Set kernel interface MTU first. */
720 ret = mlx5_set_mtu(dev, mtu);
723 ret = mlx5_get_mtu(dev, &kern_mtu);
726 if (kern_mtu == mtu) {
728 DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
729 dev->data->port_id, mtu);
737 * DPDK callback to get flow control status.
740 * Pointer to Ethernet device structure.
741 * @param[out] fc_conf
742 * Flow control output buffer.
745 * 0 on success, a negative errno value otherwise and rte_errno is set.
748 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
751 struct ethtool_pauseparam ethpause = {
752 .cmd = ETHTOOL_GPAUSEPARAM
756 ifr.ifr_data = (void *)ðpause;
757 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
760 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
762 dev->data->port_id, strerror(rte_errno));
765 fc_conf->autoneg = ethpause.autoneg;
766 if (ethpause.rx_pause && ethpause.tx_pause)
767 fc_conf->mode = RTE_FC_FULL;
768 else if (ethpause.rx_pause)
769 fc_conf->mode = RTE_FC_RX_PAUSE;
770 else if (ethpause.tx_pause)
771 fc_conf->mode = RTE_FC_TX_PAUSE;
773 fc_conf->mode = RTE_FC_NONE;
778 * DPDK callback to modify flow control parameters.
781 * Pointer to Ethernet device structure.
783 * Flow control parameters.
786 * 0 on success, a negative errno value otherwise and rte_errno is set.
789 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
792 struct ethtool_pauseparam ethpause = {
793 .cmd = ETHTOOL_SPAUSEPARAM
797 ifr.ifr_data = (void *)ðpause;
798 ethpause.autoneg = fc_conf->autoneg;
799 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
800 (fc_conf->mode & RTE_FC_RX_PAUSE))
801 ethpause.rx_pause = 1;
803 ethpause.rx_pause = 0;
805 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
806 (fc_conf->mode & RTE_FC_TX_PAUSE))
807 ethpause.tx_pause = 1;
809 ethpause.tx_pause = 0;
810 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
813 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
815 dev->data->port_id, strerror(rte_errno));
822 * Get PCI information from struct ibv_device.
825 * Pointer to Ethernet device structure.
826 * @param[out] pci_addr
827 * PCI bus address output buffer.
830 * 0 on success, a negative errno value otherwise and rte_errno is set.
833 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
834 struct rte_pci_addr *pci_addr)
838 MKSTR(path, "%s/device/uevent", device->ibdev_path);
840 file = fopen(path, "rb");
845 while (fgets(line, sizeof(line), file) == line) {
846 size_t len = strlen(line);
849 /* Truncate long lines. */
850 if (len == (sizeof(line) - 1))
851 while (line[(len - 1)] != '\n') {
855 line[(len - 1)] = ret;
857 /* Extract information. */
860 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
864 &pci_addr->function) == 4) {
874 * Device status handler.
877 * Pointer to Ethernet device.
879 * Pointer to event flags holder.
882 * Events bitmap of callback process which can be called immediately.
885 mlx5_dev_status_handler(struct rte_eth_dev *dev)
887 struct priv *priv = dev->data->dev_private;
888 struct ibv_async_event event;
891 if (mlx5_link_update(dev, 0) == -EAGAIN) {
895 /* Read all message and acknowledge them. */
897 if (mlx5_glue->get_async_event(priv->ctx, &event))
899 if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
900 event.event_type == IBV_EVENT_PORT_ERR) &&
901 (dev->data->dev_conf.intr_conf.lsc == 1))
902 ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
903 else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
904 dev->data->dev_conf.intr_conf.rmv == 1)
905 ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
908 "port %u event type %d on not handled",
909 dev->data->port_id, event.event_type);
910 mlx5_glue->ack_async_event(&event);
916 * Handle interrupts from the NIC.
918 * @param[in] intr_handle
924 mlx5_dev_interrupt_handler(void *cb_arg)
926 struct rte_eth_dev *dev = cb_arg;
929 events = mlx5_dev_status_handler(dev);
930 if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
931 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
932 if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
933 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
937 * Handle interrupts from the socket.
943 mlx5_dev_handler_socket(void *cb_arg)
945 struct rte_eth_dev *dev = cb_arg;
947 mlx5_socket_handle(dev);
951 * Uninstall interrupt handler.
954 * Pointer to Ethernet device.
957 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev)
959 struct priv *priv = dev->data->dev_private;
961 if (dev->data->dev_conf.intr_conf.lsc ||
962 dev->data->dev_conf.intr_conf.rmv)
963 rte_intr_callback_unregister(&priv->intr_handle,
964 mlx5_dev_interrupt_handler, dev);
965 if (priv->primary_socket)
966 rte_intr_callback_unregister(&priv->intr_handle_socket,
967 mlx5_dev_handler_socket, dev);
968 priv->intr_handle.fd = 0;
969 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
970 priv->intr_handle_socket.fd = 0;
971 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
975 * Install interrupt handler.
978 * Pointer to Ethernet device.
981 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev)
983 struct priv *priv = dev->data->dev_private;
987 assert(priv->ctx->async_fd > 0);
988 flags = fcntl(priv->ctx->async_fd, F_GETFL);
989 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
992 "port %u failed to change file descriptor async event"
995 dev->data->dev_conf.intr_conf.lsc = 0;
996 dev->data->dev_conf.intr_conf.rmv = 0;
998 if (dev->data->dev_conf.intr_conf.lsc ||
999 dev->data->dev_conf.intr_conf.rmv) {
1000 priv->intr_handle.fd = priv->ctx->async_fd;
1001 priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
1002 rte_intr_callback_register(&priv->intr_handle,
1003 mlx5_dev_interrupt_handler, dev);
1005 ret = mlx5_socket_init(dev);
1007 DRV_LOG(ERR, "port %u cannot initialise socket: %s",
1008 dev->data->port_id, strerror(rte_errno));
1009 else if (priv->primary_socket) {
1010 priv->intr_handle_socket.fd = priv->primary_socket;
1011 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
1012 rte_intr_callback_register(&priv->intr_handle_socket,
1013 mlx5_dev_handler_socket, dev);
1018 * DPDK callback to bring the link DOWN.
1021 * Pointer to Ethernet device structure.
1024 * 0 on success, a negative errno value otherwise and rte_errno is set.
1027 mlx5_set_link_down(struct rte_eth_dev *dev)
1029 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
1033 * DPDK callback to bring the link UP.
1036 * Pointer to Ethernet device structure.
1039 * 0 on success, a negative errno value otherwise and rte_errno is set.
1042 mlx5_set_link_up(struct rte_eth_dev *dev)
1044 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
1048 * Configure the TX function to use.
1051 * Pointer to private data structure.
1054 * Pointer to selected Tx burst function.
1057 mlx5_select_tx_function(struct rte_eth_dev *dev)
1059 struct priv *priv = dev->data->dev_private;
1060 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst;
1061 struct mlx5_dev_config *config = &priv->config;
1062 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
1063 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |
1064 DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1065 DEV_TX_OFFLOAD_GRE_TNL_TSO));
1066 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT);
1068 assert(priv != NULL);
1069 /* Select appropriate TX function. */
1070 if (vlan_insert || tso)
1071 return tx_pkt_burst;
1072 if (config->mps == MLX5_MPW_ENHANCED) {
1073 if (mlx5_check_vec_tx_support(dev) > 0) {
1074 if (mlx5_check_raw_vec_tx_support(dev) > 0)
1075 tx_pkt_burst = mlx5_tx_burst_raw_vec;
1077 tx_pkt_burst = mlx5_tx_burst_vec;
1079 "port %u selected enhanced MPW Tx vectorized"
1081 dev->data->port_id);
1083 tx_pkt_burst = mlx5_tx_burst_empw;
1085 "port %u selected enhanced MPW Tx function",
1086 dev->data->port_id);
1088 } else if (config->mps && (config->txq_inline > 0)) {
1089 tx_pkt_burst = mlx5_tx_burst_mpw_inline;
1090 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function",
1091 dev->data->port_id);
1092 } else if (config->mps) {
1093 tx_pkt_burst = mlx5_tx_burst_mpw;
1094 DRV_LOG(DEBUG, "port %u selected MPW Tx function",
1095 dev->data->port_id);
1097 return tx_pkt_burst;
1101 * Configure the RX function to use.
1104 * Pointer to private data structure.
1107 * Pointer to selected Rx burst function.
1110 mlx5_select_rx_function(struct rte_eth_dev *dev)
1112 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst;
1114 assert(dev != NULL);
1115 if (mlx5_check_vec_rx_support(dev) > 0) {
1116 rx_pkt_burst = mlx5_rx_burst_vec;
1117 DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
1118 dev->data->port_id);
1120 return rx_pkt_burst;
1124 * Check if mlx5 device was removed.
1127 * Pointer to Ethernet device structure.
1130 * 1 when device is removed, otherwise 0.
1133 mlx5_is_removed(struct rte_eth_dev *dev)
1135 struct ibv_device_attr device_attr;
1136 struct priv *priv = dev->data->dev_private;
1138 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO)