* Copyright 2019 Mellanox Technologies, Ltd
*/
#include <unistd.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <netinet/in.h>
#include <rte_malloc.h>
#include <rte_log.h>
(1ULL << VIRTIO_NET_F_MQ) | \
(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
(1ULL << VIRTIO_F_ORDER_PLATFORM) | \
- (1ULL << VHOST_F_LOG_ALL))
+ (1ULL << VHOST_F_LOG_ALL) | \
+ (1ULL << VIRTIO_NET_F_MTU))
#define MLX5_VDPA_PROTOCOL_FEATURES \
((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
- (1ULL << VHOST_USER_PROTOCOL_F_MQ))
+ (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))
+
+#define MLX5_VDPA_MAX_RETRIES 20
+#define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
TAILQ_HEAD_INITIALIZER(priv_list);
int mlx5_vdpa_logtype;
static struct mlx5_vdpa_priv *
-mlx5_vdpa_find_priv_resource_by_did(int did)
+mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
{
struct mlx5_vdpa_priv *priv;
int found = 0;
pthread_mutex_lock(&priv_list_lock);
TAILQ_FOREACH(priv, &priv_list, next) {
- if (did == priv->id) {
+ if (vdev == priv->vdev) {
found = 1;
break;
}
}
pthread_mutex_unlock(&priv_list_lock);
if (!found) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
rte_errno = EINVAL;
return NULL;
}
}
static int
-mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num)
+mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
{
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -1;
}
*queue_num = priv->caps.max_num_virtio_queues;
}
static int
-mlx5_vdpa_get_vdpa_features(int did, uint64_t *features)
+mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
{
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -1;
}
*features = MLX5_VDPA_DEFAULT_FEATURES;
}
static int
-mlx5_vdpa_get_protocol_features(int did, uint64_t *features)
+mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
+ uint64_t *features)
{
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -1;
}
*features = MLX5_VDPA_PROTOCOL_FEATURES;
static int
mlx5_vdpa_set_vring_state(int vid, int vring, int state)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -EINVAL;
}
if (vring >= (int)priv->caps.max_num_virtio_queues * 2) {
static int
mlx5_vdpa_features_set(int vid)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
uint64_t log_base, log_size;
uint64_t features;
int ret;
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -EINVAL;
}
ret = rte_vhost_get_negotiated_features(vid, &features);
#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
}
+static int
+mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
+{
+ struct ifreq request;
+ uint16_t vhost_mtu = 0;
+ uint16_t kern_mtu = 0;
+ int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
+ int sock;
+ int retries = MLX5_VDPA_MAX_RETRIES;
+
+ if (ret) {
+ DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
+ return ret;
+ }
+ if (!vhost_mtu) {
+ DRV_LOG(DEBUG, "Vhost MTU is 0.");
+ return ret;
+ }
+ ret = mlx5_get_ifname_sysfs(priv->ctx->device->ibdev_path,
+ request.ifr_name);
+ if (ret) {
+ DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
+ return ret;
+ }
+ sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+ if (sock == -1) {
+ DRV_LOG(DEBUG, "Cannot open IF socket.");
+ return sock;
+ }
+ while (retries--) {
+ ret = ioctl(sock, SIOCGIFMTU, &request);
+ if (ret == -1)
+ break;
+ kern_mtu = request.ifr_mtu;
+ DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
+ (int)vhost_mtu);
+ if (kern_mtu == vhost_mtu)
+ break;
+ request.ifr_mtu = vhost_mtu;
+ ret = ioctl(sock, SIOCSIFMTU, &request);
+ if (ret == -1)
+ break;
+ request.ifr_mtu = 0;
+ usleep(MLX5_VDPA_USEC);
+ }
+ close(sock);
+ return kern_mtu == vhost_mtu ? 0 : -1;
+}
+
static int
mlx5_vdpa_dev_close(int vid)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
int ret = 0;
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -1;
}
if (priv->configured)
static int
mlx5_vdpa_dev_config(int vid)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -EINVAL;
}
if (priv->configured && mlx5_vdpa_dev_close(vid)) {
return -1;
}
priv->vid = vid;
+ if (mlx5_vdpa_mtu_set(priv))
+ DRV_LOG(WARNING, "MTU cannot be set on device %s.",
+ vdev->device->name);
if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) ||
mlx5_vdpa_direct_db_prepare(priv) ||
mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
static int
mlx5_vdpa_get_device_fd(int vid)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -EINVAL;
}
return priv->ctx->cmd_fd;
static int
mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
{
- int did = rte_vhost_get_vdpa_device_id(vid);
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
RTE_SET_USED(qid);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
return -EINVAL;
}
if (!priv->var) {
- DRV_LOG(ERR, "VAR was not created for device %d, is the device"
- " configured?.", did);
+ DRV_LOG(ERR, "VAR was not created for device %s, is the device"
+ " configured?.", vdev->device->name);
return -EINVAL;
}
*offset = priv->var->mmap_off;
}
static int
-mlx5_vdpa_get_stats_names(int did, struct rte_vdpa_stat_name *stats_names,
- unsigned int size)
+mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
+ struct rte_vdpa_stat_name *stats_names,
+ unsigned int size)
{
static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
"received_descriptors",
"invalid buffer",
"completion errors",
};
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
unsigned int i;
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
return -ENODEV;
}
if (!stats_names)
}
static int
-mlx5_vdpa_get_stats(int did, int qid, struct rte_vdpa_stat *stats,
- unsigned int n)
+mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
+ struct rte_vdpa_stat *stats, unsigned int n)
{
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
return -ENODEV;
}
if (!priv->configured) {
- DRV_LOG(ERR, "Device %d was not configured.", did);
+ DRV_LOG(ERR, "Device %s was not configured.",
+ vdev->device->name);
return -ENODATA;
}
if (qid >= (int)priv->nr_virtqs) {
- DRV_LOG(ERR, "Too big vring id: %d.", qid);
+ DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
+ vdev->device->name);
return -E2BIG;
}
if (!priv->caps.queue_counters_valid) {
- DRV_LOG(ERR, "Virtq statistics is not supported for device %d.",
- did);
+ DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
+ vdev->device->name);
return -ENOTSUP;
}
return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
}
static int
-mlx5_vdpa_reset_stats(int did, int qid)
+mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
{
- struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+ struct mlx5_vdpa_priv *priv =
+ mlx5_vdpa_find_priv_resource_by_vdev(vdev);
if (priv == NULL) {
- DRV_LOG(ERR, "Invalid device id: %d.", did);
+ DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
return -ENODEV;
}
if (!priv->configured) {
- DRV_LOG(ERR, "Device %d was not configured.", did);
+ DRV_LOG(ERR, "Device %s was not configured.",
+ vdev->device->name);
return -ENODATA;
}
if (qid >= (int)priv->nr_virtqs) {
- DRV_LOG(ERR, "Too big vring id: %d.", qid);
+ DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
+ vdev->device->name);
return -E2BIG;
}
if (!priv->caps.queue_counters_valid) {
- DRV_LOG(ERR, "Virtq statistics is not supported for device %d.",
- did);
+ DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
+ vdev->device->name);
return -ENOTSUP;
}
return mlx5_vdpa_virtq_stats_reset(priv, qid);
return ret;
}
-#define MLX5_VDPA_MAX_RETRIES 20
-#define MLX5_VDPA_USEC 1000
static int
mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
{
return -rte_errno;
}
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+ struct mlx5_vdpa_priv *priv = opaque;
+ unsigned long tmp;
+
+ if (strcmp(key, "class") == 0)
+ return 0;
+ errno = 0;
+ tmp = strtoul(val, NULL, 0);
+ if (errno) {
+ DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+ return -errno;
+ }
+ if (strcmp(key, "event_mode") == 0) {
+ if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+ priv->event_mode = (int)tmp;
+ else
+ DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+ } else if (strcmp(key, "event_us") == 0) {
+ priv->event_us = (uint32_t)tmp;
+ } else if (strcmp(key, "no_traffic_time") == 0) {
+ priv->no_traffic_time_s = (uint32_t)tmp;
+ } else {
+ DRV_LOG(WARNING, "Invalid key %s.", key);
+ }
+ return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+ struct rte_kvargs *kvlist;
+
+ priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+ priv->event_us = 0;
+ priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+ if (devargs == NULL)
+ return;
+ kvlist = rte_kvargs_parse(devargs->args, NULL);
+ if (kvlist == NULL)
+ return;
+ rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+ rte_kvargs_free(kvlist);
+ if (!priv->event_us) {
+ if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+ priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+ else if (priv->event_mode == MLX5_VDPA_EVENT_MODE_FIXED_TIMER)
+ priv->event_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+ }
+ DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+ DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+ DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
/**
* DPDK callback to register a PCI device.
*
priv->caps = attr.vdpa;
priv->log_max_rqt_size = attr.log_max_rqt_size;
priv->ctx = ctx;
- priv->dev_addr.pci_addr = pci_dev->addr;
- priv->dev_addr.type = VDPA_ADDR_PCI;
+ priv->pci_dev = pci_dev;
priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
if (!priv->var) {
DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno);
goto error;
}
- priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops);
- if (priv->id < 0) {
+ priv->vdev = rte_vdpa_register_device(&pci_dev->device,
+ &mlx5_vdpa_ops);
+ if (priv->vdev == NULL) {
DRV_LOG(ERR, "Failed to register vDPA device.");
rte_errno = rte_errno ? rte_errno : EINVAL;
goto error;
}
+ mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
SLIST_INIT(&priv->mr_list);
pthread_mutex_lock(&priv_list_lock);
TAILQ_INSERT_TAIL(&priv_list, priv, next);
pthread_mutex_lock(&priv_list_lock);
TAILQ_FOREACH(priv, &priv_list, next) {
- if (memcmp(&priv->dev_addr.pci_addr, &pci_dev->addr,
- sizeof(pci_dev->addr)) == 0) {
+ if (!rte_pci_addr_cmp(&priv->pci_dev->addr, &pci_dev->addr)) {
found = 1;
break;
}