#include <sys/un.h>
#include <time.h>
-#include <rte_atomic.h>
-#include <rte_ethdev_driver.h>
+#include <ethdev_driver.h>
#include <rte_bus_pci.h>
#include <rte_mbuf.h>
#include <rte_common.h>
#include <mlx5_glue.h>
#include <mlx5_devx_cmds.h>
#include <mlx5_common.h>
+#include <mlx5_malloc.h>
#include "mlx5.h"
#include "mlx5_rxtx.h"
#define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
#endif
-/**
- * Get master interface name from private structure.
- *
- * @param[in] dev
- * Pointer to Ethernet device.
- * @param[out] ifname
- * Interface name output buffer.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE])
-{
- DIR *dir;
- struct dirent *dent;
- unsigned int dev_type = 0;
- unsigned int dev_port_prev = ~0u;
- char match[IF_NAMESIZE] = "";
-
- MLX5_ASSERT(ibdev_path);
- {
- MKSTR(path, "%s/device/net", ibdev_path);
-
- dir = opendir(path);
- if (dir == NULL) {
- rte_errno = errno;
- return -rte_errno;
- }
- }
- while ((dent = readdir(dir)) != NULL) {
- char *name = dent->d_name;
- FILE *file;
- unsigned int dev_port;
- int r;
-
- if ((name[0] == '.') &&
- ((name[1] == '\0') ||
- ((name[1] == '.') && (name[2] == '\0'))))
- continue;
-
- MKSTR(path, "%s/device/net/%s/%s",
- ibdev_path, name,
- (dev_type ? "dev_id" : "dev_port"));
-
- file = fopen(path, "rb");
- if (file == NULL) {
- if (errno != ENOENT)
- continue;
- /*
- * Switch to dev_id when dev_port does not exist as
- * is the case with Linux kernel versions < 3.15.
- */
-try_dev_id:
- match[0] = '\0';
- if (dev_type)
- break;
- dev_type = 1;
- dev_port_prev = ~0u;
- rewinddir(dir);
- continue;
- }
- r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
- fclose(file);
- if (r != 1)
- continue;
- /*
- * Switch to dev_id when dev_port returns the same value for
- * all ports. May happen when using a MOFED release older than
- * 3.0 with a Linux kernel >= 3.15.
- */
- if (dev_port == dev_port_prev)
- goto try_dev_id;
- dev_port_prev = dev_port;
- if (dev_port == 0)
- strlcpy(match, name, sizeof(match));
- }
- closedir(dir);
- if (match[0] == '\0') {
- rte_errno = ENOENT;
- return -rte_errno;
- }
- strncpy(*ifname, match, sizeof(*ifname));
- return 0;
-}
/**
* Get interface name from private structure.
*
- * This is a port representor-aware version of mlx5_get_master_ifname().
+ * This is a port representor-aware version of mlx5_get_ifname_sysfs().
*
* @param[in] dev
* Pointer to Ethernet device.
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
+mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE])
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int ifindex;
MLX5_ASSERT(priv);
MLX5_ASSERT(priv->sh);
+ if (priv->bond_ifindex > 0) {
+ memcpy(ifname, priv->bond_name, MLX5_NAMESIZE);
+ return 0;
+ }
ifindex = mlx5_ifindex(dev);
if (!ifindex) {
if (!priv->representor)
- return mlx5_get_master_ifname(priv->sh->ibdev_path,
- ifname);
+ return mlx5_get_ifname_sysfs(priv->sh->ibdev_path,
+ *ifname);
rte_errno = ENXIO;
return -rte_errno;
}
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
+static int
mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
{
int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
+static int
mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
{
struct ifreq request;
}
link_speed = ethtool_cmd_speed(&edata);
if (link_speed == -1)
- dev_link.link_speed = ETH_SPEED_NUM_NONE;
+ dev_link.link_speed = ETH_SPEED_NUM_UNKNOWN;
else
dev_link.link_speed = link_speed;
priv->link_speed_capa = 0;
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if (((dev_link.link_speed && !dev_link.link_status) ||
- (!dev_link.link_speed && dev_link.link_status))) {
- rte_errno = EAGAIN;
- return -rte_errno;
- }
*link = dev_link;
return 0;
}
dev->data->port_id, strerror(rte_errno));
return ret;
}
- dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE :
- ecmd->speed;
+ dev_link.link_speed = (ecmd->speed == UINT32_MAX) ?
+ ETH_SPEED_NUM_UNKNOWN : ecmd->speed;
sc = ecmd->link_mode_masks[0] |
((uint64_t)ecmd->link_mode_masks[1] << 32);
priv->link_speed_capa = 0;
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if (((dev_link.link_speed && !dev_link.link_status) ||
- (!dev_link.link_speed && dev_link.link_status))) {
- rte_errno = EAGAIN;
- return -rte_errno;
- }
*link = dev_link;
return 0;
}
dev = &rte_eth_devices[sh->port[i].ih_port_id];
MLX5_ASSERT(dev);
if (dev->data->dev_conf.intr_conf.rmv)
- _rte_eth_dev_callback_process
+ rte_eth_dev_callback_process
(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
}
}
usleep(0);
continue;
}
- _rte_eth_dev_callback_process
+ rte_eth_dev_callback_process
(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
continue;
}
return 0;
}
+/**
+ * Analyze gathered port parameters via sysfs to recognize master
+ * and representor devices for E-Switch configuration.
+ *
+ * @param[in] device_dir
+ * flag of presence of "device" directory under port device key.
+ * @param[inout] switch_info
+ * Port information, including port name as a number and port name
+ * type if recognized
+ *
+ * @return
+ * master and representor flags are set in switch_info according to
+ * recognized parameters (if any).
+ */
+static void
+mlx5_sysfs_check_switch_info(bool device_dir,
+ struct mlx5_switch_info *switch_info)
+{
+ switch (switch_info->name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+ /*
+ * Name is not recognized, assume the master,
+ * check the device directory presence.
+ */
+ switch_info->master = device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+ /*
+ * Name is not set, this assumes the legacy naming
+ * schema for master, just check if there is
+ * a device directory.
+ */
+ switch_info->master = device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ /* New uplink naming schema recognized. */
+ switch_info->master = 1;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+ /* Legacy representors naming schema. */
+ switch_info->representor = !device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+ /* Fallthrough */
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ /* New representors naming schema. */
+ switch_info->representor = 1;
+ break;
+ }
+}
+
/**
* Get switch information associated with network interface.
*
file = fopen(phys_port_name, "rb");
if (file != NULL) {
- ret = fscanf(file, "%s", port_name);
+ ret = fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", port_name);
fclose(file);
if (ret == 1)
mlx5_translate_port_name(port_name, &data);
}
/**
- * Analyze gathered port parameters via sysfs to recognize master
- * and representor devices for E-Switch configuration.
+ * Get bond information associated with network interface.
*
- * @param[in] device_dir
- * flag of presence of "device" directory under port device key.
- * @param[inout] switch_info
- * Port information, including port name as a number and port name
- * type if recognized
+ * @param pf_ifindex
+ * Network interface index of bond slave interface
+ * @param[out] ifindex
+ * Pointer to bond ifindex.
+ * @param[out] ifname
+ * Pointer to bond ifname.
*
* @return
- * master and representor flags are set in switch_info according to
- * recognized parameters (if any).
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-void
-mlx5_sysfs_check_switch_info(bool device_dir,
- struct mlx5_switch_info *switch_info)
+int
+mlx5_sysfs_bond_info(unsigned int pf_ifindex, unsigned int *ifindex,
+ char *ifname)
{
- switch (switch_info->name_type) {
- case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
- /*
- * Name is not recognized, assume the master,
- * check the device directory presence.
- */
- switch_info->master = device_dir;
- break;
- case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
- /*
- * Name is not set, this assumes the legacy naming
- * schema for master, just check if there is
- * a device directory.
- */
- switch_info->master = device_dir;
- break;
- case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
- /* New uplink naming schema recognized. */
- switch_info->master = 1;
- break;
- case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
- /* Legacy representors naming schema. */
- switch_info->representor = !device_dir;
- break;
- case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
- /* New representors naming schema. */
- switch_info->representor = 1;
- break;
+ char name[IF_NAMESIZE];
+ FILE *file;
+ unsigned int index;
+ int ret;
+
+ if (!if_indextoname(pf_ifindex, name) || !strlen(name)) {
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ MKSTR(bond_if, "/sys/class/net/%s/master/ifindex", name);
+ /* read bond ifindex */
+ file = fopen(bond_if, "rb");
+ if (file == NULL) {
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ ret = fscanf(file, "%u", &index);
+ fclose(file);
+ if (ret <= 0) {
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ if (ifindex)
+ *ifindex = index;
+
+ /* read bond device name from symbol link */
+ if (ifname) {
+ if (!if_indextoname(index, ifname)) {
+ rte_errno = errno;
+ return -rte_errno;
+ }
}
+ return 0;
}
/**
rte_errno = EINVAL;
return -rte_errno;
}
- eeprom = rte_calloc(__func__, 1,
- (sizeof(struct ethtool_eeprom) + info->length), 0);
+ eeprom = mlx5_malloc(MLX5_MEM_ZERO,
+ (sizeof(struct ethtool_eeprom) + info->length), 0,
+ SOCKET_ID_ANY);
if (!eeprom) {
DRV_LOG(WARNING, "port %u cannot allocate memory for "
"eeprom data", dev->data->port_id);
dev->data->port_id, strerror(rte_errno));
else
rte_memcpy(info->data, eeprom->data, info->length);
- rte_free(eeprom);
+ mlx5_free(eeprom);
return ret;
}
+
+/**
+ * Read device counters table.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] stats
+ * Counters table output buffer.
+ *
+ * @return
+ * 0 on success and stats is filled, negative errno value otherwise and
+ * rte_errno is set.
+ */
+int
+mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+ unsigned int i;
+ struct ifreq ifr;
+ unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t);
+ unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz];
+ struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf;
+ int ret;
+
+ et_stats->cmd = ETHTOOL_GSTATS;
+ et_stats->n_stats = xstats_ctrl->stats_n;
+ ifr.ifr_data = (caddr_t)et_stats;
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+ if (ret) {
+ DRV_LOG(WARNING,
+ "port %u unable to read statistic values from device",
+ dev->data->port_id);
+ return ret;
+ }
+ for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {
+ if (xstats_ctrl->info[i].dev) {
+ ret = mlx5_os_read_dev_stat(priv,
+ xstats_ctrl->info[i].ctr_name,
+ &stats[i]);
+ /* return last xstats counter if fail to read. */
+ if (ret == 0)
+ xstats_ctrl->xstats[i] = stats[i];
+ else
+ stats[i] = xstats_ctrl->xstats[i];
+ } else {
+ stats[i] = (uint64_t)
+ et_stats->data[xstats_ctrl->dev_table_idx[i]];
+ }
+ }
+ return 0;
+}
+
+/**
+ * Query the number of statistics provided by ETHTOOL.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * Number of statistics on success, negative errno value otherwise and
+ * rte_errno is set.
+ */
+int
+mlx5_os_get_stats_n(struct rte_eth_dev *dev)
+{
+ struct ethtool_drvinfo drvinfo;
+ struct ifreq ifr;
+ int ret;
+
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ ifr.ifr_data = (caddr_t)&drvinfo;
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+ if (ret) {
+ DRV_LOG(WARNING, "port %u unable to query number of statistics",
+ dev->data->port_id);
+ return ret;
+ }
+ return drvinfo.n_stats;
+}
+
+static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
+ {
+ .dpdk_name = "rx_unicast_bytes",
+ .ctr_name = "rx_vport_unicast_bytes",
+ },
+ {
+ .dpdk_name = "rx_multicast_bytes",
+ .ctr_name = "rx_vport_multicast_bytes",
+ },
+ {
+ .dpdk_name = "rx_broadcast_bytes",
+ .ctr_name = "rx_vport_broadcast_bytes",
+ },
+ {
+ .dpdk_name = "rx_unicast_packets",
+ .ctr_name = "rx_vport_unicast_packets",
+ },
+ {
+ .dpdk_name = "rx_multicast_packets",
+ .ctr_name = "rx_vport_multicast_packets",
+ },
+ {
+ .dpdk_name = "rx_broadcast_packets",
+ .ctr_name = "rx_vport_broadcast_packets",
+ },
+ {
+ .dpdk_name = "tx_unicast_bytes",
+ .ctr_name = "tx_vport_unicast_bytes",
+ },
+ {
+ .dpdk_name = "tx_multicast_bytes",
+ .ctr_name = "tx_vport_multicast_bytes",
+ },
+ {
+ .dpdk_name = "tx_broadcast_bytes",
+ .ctr_name = "tx_vport_broadcast_bytes",
+ },
+ {
+ .dpdk_name = "tx_unicast_packets",
+ .ctr_name = "tx_vport_unicast_packets",
+ },
+ {
+ .dpdk_name = "tx_multicast_packets",
+ .ctr_name = "tx_vport_multicast_packets",
+ },
+ {
+ .dpdk_name = "tx_broadcast_packets",
+ .ctr_name = "tx_vport_broadcast_packets",
+ },
+ {
+ .dpdk_name = "rx_wqe_errors",
+ .ctr_name = "rx_wqe_err",
+ },
+ {
+ .dpdk_name = "rx_phy_crc_errors",
+ .ctr_name = "rx_crc_errors_phy",
+ },
+ {
+ .dpdk_name = "rx_phy_in_range_len_errors",
+ .ctr_name = "rx_in_range_len_errors_phy",
+ },
+ {
+ .dpdk_name = "rx_phy_symbol_errors",
+ .ctr_name = "rx_symbol_err_phy",
+ },
+ {
+ .dpdk_name = "tx_phy_errors",
+ .ctr_name = "tx_errors_phy",
+ },
+ {
+ .dpdk_name = "rx_out_of_buffer",
+ .ctr_name = "out_of_buffer",
+ .dev = 1,
+ },
+ {
+ .dpdk_name = "tx_phy_packets",
+ .ctr_name = "tx_packets_phy",
+ },
+ {
+ .dpdk_name = "rx_phy_packets",
+ .ctr_name = "rx_packets_phy",
+ },
+ {
+ .dpdk_name = "tx_phy_discard_packets",
+ .ctr_name = "tx_discards_phy",
+ },
+ {
+ .dpdk_name = "rx_phy_discard_packets",
+ .ctr_name = "rx_discards_phy",
+ },
+ {
+ .dpdk_name = "tx_phy_bytes",
+ .ctr_name = "tx_bytes_phy",
+ },
+ {
+ .dpdk_name = "rx_phy_bytes",
+ .ctr_name = "rx_bytes_phy",
+ },
+ /* Representor only */
+ {
+ .dpdk_name = "rx_vport_packets",
+ .ctr_name = "vport_rx_packets",
+ },
+ {
+ .dpdk_name = "rx_vport_bytes",
+ .ctr_name = "vport_rx_bytes",
+ },
+ {
+ .dpdk_name = "tx_vport_packets",
+ .ctr_name = "vport_tx_packets",
+ },
+ {
+ .dpdk_name = "tx_vport_bytes",
+ .ctr_name = "vport_tx_bytes",
+ },
+};
+
+static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
+
+/**
+ * Init the structures to read device counters.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_os_stats_init(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+ struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
+ unsigned int i;
+ unsigned int j;
+ struct ifreq ifr;
+ struct ethtool_gstrings *strings = NULL;
+ unsigned int dev_stats_n;
+ unsigned int str_sz;
+ int ret;
+
+ /* So that it won't aggregate for each init. */
+ xstats_ctrl->mlx5_stats_n = 0;
+ ret = mlx5_os_get_stats_n(dev);
+ if (ret < 0) {
+ DRV_LOG(WARNING, "port %u no extended statistics available",
+ dev->data->port_id);
+ return;
+ }
+ dev_stats_n = ret;
+ /* Allocate memory to grab stat names and values. */
+ str_sz = dev_stats_n * ETH_GSTRING_LEN;
+ strings = (struct ethtool_gstrings *)
+ mlx5_malloc(0, str_sz + sizeof(struct ethtool_gstrings), 0,
+ SOCKET_ID_ANY);
+ if (!strings) {
+ DRV_LOG(WARNING, "port %u unable to allocate memory for xstats",
+ dev->data->port_id);
+ return;
+ }
+ strings->cmd = ETHTOOL_GSTRINGS;
+ strings->string_set = ETH_SS_STATS;
+ strings->len = dev_stats_n;
+ ifr.ifr_data = (caddr_t)strings;
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+ if (ret) {
+ DRV_LOG(WARNING, "port %u unable to get statistic names",
+ dev->data->port_id);
+ goto free;
+ }
+ for (i = 0; i != dev_stats_n; ++i) {
+ const char *curr_string = (const char *)
+ &strings->data[i * ETH_GSTRING_LEN];
+
+ for (j = 0; j != xstats_n; ++j) {
+ if (!strcmp(mlx5_counters_init[j].ctr_name,
+ curr_string)) {
+ unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+ xstats_ctrl->dev_table_idx[idx] = i;
+ xstats_ctrl->info[idx] = mlx5_counters_init[j];
+ break;
+ }
+ }
+ }
+ /* Add dev counters. */
+ for (i = 0; i != xstats_n; ++i) {
+ if (mlx5_counters_init[i].dev) {
+ unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+ xstats_ctrl->info[idx] = mlx5_counters_init[i];
+ xstats_ctrl->hw_stats[idx] = 0;
+ }
+ }
+ MLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS);
+ xstats_ctrl->stats_n = dev_stats_n;
+ /* Copy to base at first time. */
+ ret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base);
+ if (ret)
+ DRV_LOG(ERR, "port %u cannot read device counters: %s",
+ dev->data->port_id, strerror(rte_errno));
+ mlx5_os_read_dev_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
+ stats_ctrl->imissed = 0;
+free:
+ mlx5_free(strings);
+}
+
+/**
+ * Get MAC address by querying netdevice.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[out] mac
+ * MAC address output buffer.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN])
+{
+ struct ifreq request;
+ int ret;
+
+ ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);
+ if (ret)
+ return ret;
+ memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
+ return 0;
+}
+