/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015 Mellanox Technologies, Ltd
*/
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdalign.h>
#include <sys/un.h>
+#include <time.h>
#include <rte_atomic.h>
#include <rte_ethdev_driver.h>
#include <rte_mbuf.h>
#include <rte_common.h>
#include <rte_interrupts.h>
-#include <rte_alarm.h>
#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
#include "mlx5.h"
#include "mlx5_glue.h"
goto try_dev_id;
dev_port_prev = dev_port;
if (dev_port == (priv->port - 1u))
- snprintf(match, sizeof(match), "%s", name);
+ strlcpy(match, name, sizeof(match));
}
closedir(dir);
if (match[0] == '\0') {
return 0;
}
+/**
+ * Get the interface index from device name.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * Interface index on success, a negative errno value otherwise and
+ * rte_errno is set.
+ */
+int
+mlx5_ifindex(const struct rte_eth_dev *dev)
+{
+ char ifname[IF_NAMESIZE];
+ int ret;
+
+ ret = mlx5_get_ifname(dev, &ifname);
+ if (ret)
+ return ret;
+ ret = if_nametoindex(ifname);
+ if (ret == -1) {
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ return ret;
+}
+
/**
* Perform ifreq ioctl() on associated Ethernet device.
*
unsigned int reta_idx_n;
const uint8_t use_app_rss_key =
!!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
- uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev);
- uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
- uint64_t supp_rx_offloads =
- (mlx5_get_rx_port_offloads() |
- mlx5_get_rx_queue_offloads(dev));
- uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
int ret = 0;
- if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
- DRV_LOG(ERR,
- "port %u some Tx offloads are not supported requested"
- " 0x%" PRIx64 " supported 0x%" PRIx64,
- dev->data->port_id, tx_offloads, supp_tx_offloads);
- rte_errno = ENOTSUP;
- return -rte_errno;
- }
- if ((rx_offloads & supp_rx_offloads) != rx_offloads) {
- DRV_LOG(ERR,
- "port %u some Rx offloads are not supported requested"
- " 0x%" PRIx64 " supported 0x%" PRIx64,
- dev->data->port_id, rx_offloads, supp_rx_offloads);
- rte_errno = ENOTSUP;
- return -rte_errno;
- }
if (use_app_rss_key &&
(dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
rss_hash_default_key_len)) {
- /* MLX5 RSS only support 40bytes key. */
+ DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long",
+ dev->data->port_id, rss_hash_default_key_len);
rte_errno = EINVAL;
return -rte_errno;
}
if (++j == rxqs_n)
j = 0;
}
+ /*
+ * Once the device is added to the list of memory event callback, its
+ * global MR cache table cannot be expanded on the fly because of
+ * deadlock. If it overflows, lookup should be done by searching MR list
+ * linearly, which is slow.
+ */
+ if (mlx5_mr_btree_init(&priv->mr.cache, MLX5_MR_BTREE_CACHE_N * 2,
+ dev->device->numa_node)) {
+ /* rte_errno is already set. */
+ return -rte_errno;
+ }
return 0;
}
+/**
+ * Sets default tuning parameters.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] info
+ * Info structure output buffer.
+ */
+static void
+mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ /* Minimum CPU utilization. */
+ info->default_rxportconf.ring_size = 256;
+ info->default_txportconf.ring_size = 256;
+ info->default_rxportconf.burst_size = 64;
+ info->default_txportconf.burst_size = 64;
+ if (priv->link_speed_capa & ETH_LINK_SPEED_100G) {
+ info->default_rxportconf.nb_queues = 16;
+ info->default_txportconf.nb_queues = 16;
+ if (dev->data->nb_rx_queues > 2 ||
+ dev->data->nb_tx_queues > 2) {
+ /* Max Throughput. */
+ info->default_rxportconf.ring_size = 2048;
+ info->default_txportconf.ring_size = 2048;
+ }
+ } else {
+ info->default_rxportconf.nb_queues = 8;
+ info->default_txportconf.nb_queues = 8;
+ if (dev->data->nb_rx_queues > 2 ||
+ dev->data->nb_tx_queues > 2) {
+ /* Max Throughput. */
+ info->default_rxportconf.ring_size = 4096;
+ info->default_txportconf.ring_size = 4096;
+ }
+ }
+}
+
/**
* DPDK callback to get information about the device.
*
unsigned int max;
char ifname[IF_NAMESIZE];
- info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
/* FIXME: we should ask the device for these values. */
info->min_rx_bufsize = 32;
info->max_rx_pktlen = 65536;
max = 65535;
info->max_rx_queues = max;
info->max_tx_queues = max;
- info->max_mac_addrs = RTE_DIM(priv->mac);
+ info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES;
info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev);
info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
info->rx_queue_offload_capa);
info->if_index = if_nametoindex(ifname);
info->reta_size = priv->reta_idx_n ?
priv->reta_idx_n : config->ind_table_max_size;
- info->hash_key_size = priv->rss_conf.rss_key_len;
+ info->hash_key_size = rss_hash_default_key_len;
info->speed_capa = priv->link_speed_capa;
info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
+ mlx5_set_default_params(dev, info);
}
/**
};
if (dev->rx_pkt_burst == mlx5_rx_burst ||
+ dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
dev->rx_pkt_burst == mlx5_rx_burst_vec)
return ptypes;
return NULL;
*
* @param dev
* Pointer to Ethernet device structure.
+ * @param[out] link
+ * Storage for current link status.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
{
struct priv *priv = dev->data->dev_private;
struct ethtool_cmd edata = {
}
link_speed = ethtool_cmd_speed(&edata);
if (link_speed == -1)
- dev_link.link_speed = 0;
+ dev_link.link_speed = ETH_SPEED_NUM_NONE;
else
dev_link.link_speed = link_speed;
priv->link_speed_capa = 0;
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
- /* Link status changed. */
- dev->data->dev_link = dev_link;
- return 0;
+ if ((dev_link.link_speed && !dev_link.link_status) ||
+ (!dev_link.link_speed && dev_link.link_status)) {
+ rte_errno = EAGAIN;
+ return -rte_errno;
}
- /* Link status is still the same. */
- rte_errno = EAGAIN;
- return -rte_errno;
+ *link = dev_link;
+ return 0;
}
/**
*
* @param dev
* Pointer to Ethernet device structure.
+ * @param[out] link
+ * Storage for current link status.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+
{
struct priv *priv = dev->data->dev_private;
struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
- /* Link status changed. */
- dev->data->dev_link = dev_link;
- return 0;
+ if ((dev_link.link_speed && !dev_link.link_status) ||
+ (!dev_link.link_speed && dev_link.link_status)) {
+ rte_errno = EAGAIN;
+ return -rte_errno;
}
- /* Link status is still the same. */
- rte_errno = EAGAIN;
- return -rte_errno;
+ *link = dev_link;
+ return 0;
}
/**
* @param dev
* Pointer to Ethernet device structure.
* @param wait_to_complete
- * Wait for request completion (ignored).
+ * Wait for request completion.
*
* @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
+ * 0 if link status was not updated, positive if it was, a negative errno
+ * value otherwise and rte_errno is set.
*/
int
-mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
+mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
{
int ret;
+ struct rte_eth_link dev_link;
+ time_t start_time = time(NULL);
- ret = mlx5_link_update_unlocked_gset(dev);
- if (ret)
- ret = mlx5_link_update_unlocked_gs(dev);
- return 0;
+ do {
+ ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
+ if (ret)
+ ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
+ if (ret == 0)
+ break;
+ /* Handle wait to complete situation. */
+ if (wait_to_complete && ret == -EAGAIN) {
+ if (abs((int)difftime(time(NULL), start_time)) <
+ MLX5_LINK_STATUS_TIMEOUT) {
+ usleep(0);
+ continue;
+ } else {
+ rte_errno = EBUSY;
+ return -rte_errno;
+ }
+ } else if (ret < 0) {
+ return ret;
+ }
+ } while (wait_to_complete);
+ ret = !!memcmp(&dev->data->dev_link, &dev_link,
+ sizeof(struct rte_eth_link));
+ dev->data->dev_link = dev_link;
+ return ret;
}
/**
return 0;
}
-/**
- * Update the link status.
- *
- * @param dev
- * Pointer to Ethernet device.
- *
- * @return
- * Zero if the callback process can be called immediately, negative errno
- * value otherwise and rte_errno is set.
- */
-static int
-mlx5_link_status_update(struct rte_eth_dev *dev)
-{
- struct priv *priv = dev->data->dev_private;
- struct rte_eth_link *link = &dev->data->dev_link;
- int ret;
-
- ret = mlx5_link_update(dev, 0);
- if (ret)
- return ret;
- if (((link->link_speed == 0) && link->link_status) ||
- ((link->link_speed != 0) && !link->link_status)) {
- /*
- * Inconsistent status. Event likely occurred before the
- * kernel netdevice exposes the new status.
- */
- if (!priv->pending_alarm) {
- priv->pending_alarm = 1;
- rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
- mlx5_dev_link_status_handler,
- priv->dev);
- }
- return 1;
- } else if (unlikely(priv->pending_alarm)) {
- /* Link interrupt occurred while alarm is already scheduled. */
- priv->pending_alarm = 0;
- rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
- }
- return 0;
-}
-
/**
* Device status handler.
*
struct ibv_async_event event;
uint32_t ret = 0;
+ if (mlx5_link_update(dev, 0) == -EAGAIN) {
+ usleep(0);
+ return 0;
+ }
/* Read all message and acknowledge them. */
for (;;) {
if (mlx5_glue->get_async_event(priv->ctx, &event))
dev->data->port_id, event.event_type);
mlx5_glue->ack_async_event(&event);
}
- if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
- if (mlx5_link_status_update(dev))
- ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
return ret;
}
-/**
- * Handle delayed link status event.
- *
- * @param arg
- * Registered argument.
- */
-void
-mlx5_dev_link_status_handler(void *arg)
-{
- struct rte_eth_dev *dev = arg;
- struct priv *priv = dev->data->dev_private;
- int ret;
-
- priv->pending_alarm = 0;
- ret = mlx5_link_status_update(dev);
- if (!ret)
- _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
-}
-
/**
* Handle interrupts from the NIC.
*
if (priv->primary_socket)
rte_intr_callback_unregister(&priv->intr_handle_socket,
mlx5_dev_handler_socket, dev);
- if (priv->pending_alarm) {
- priv->pending_alarm = 0;
- rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
- }
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
priv->intr_handle_socket.fd = 0;
uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |
DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
- DEV_TX_OFFLOAD_GRE_TNL_TSO));
+ DEV_TX_OFFLOAD_GRE_TNL_TSO |
+ DEV_TX_OFFLOAD_IP_TNL_TSO |
+ DEV_TX_OFFLOAD_UDP_TNL_TSO));
+ int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO |
+ DEV_TX_OFFLOAD_UDP_TNL_TSO |
+ DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM));
int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT);
assert(priv != NULL);
/* Select appropriate TX function. */
- if (vlan_insert || tso)
+ if (vlan_insert || tso || swp)
return tx_pkt_burst;
if (config->mps == MLX5_MPW_ENHANCED) {
if (mlx5_check_vec_tx_support(dev) > 0) {
rx_pkt_burst = mlx5_rx_burst_vec;
DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
dev->data->port_id);
+ } else if (mlx5_mprq_enabled(dev)) {
+ rx_pkt_burst = mlx5_rx_burst_mprq;
}
return rx_pkt_burst;
}