+ int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+ int devlink_id;
+ int enable;
+ int ret;
+
+ if (nlsk_fd < 0)
+ return nlsk_fd;
+ devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+ if (devlink_id < 0) {
+ ret = devlink_id;
+ DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by"
+ " Netlink.");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+ if (ret) {
+ DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+ ret);
+ goto close;
+ } else if (!enable) {
+ DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+ close(nlsk_fd);
+ return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_vdpa_sys_roce_disable(const char *addr)
+{
+ FILE *file_o;
+ int enable;
+ int ret;
+
+ MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+ file_o = fopen(file_p, "rb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ ret = fscanf(file_o, "%d", &enable);
+ if (ret != 1) {
+ rte_errno = EINVAL;
+ ret = EINVAL;
+ goto close;
+ } else if (!enable) {
+ ret = 0;
+ DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+ goto close;
+ }
+ fclose(file_o);
+ file_o = fopen(file_p, "wb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ fprintf(file_o, "0\n");
+ ret = 0;
+close:
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+ fclose(file_o);
+ return ret;
+}
+
+static int
+mlx5_vdpa_roce_disable(struct rte_device *dev)
+{
+ char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+ if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+ return -rte_errno;
+ /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+ if (mlx5_vdpa_nl_roce_disable(pci_addr) != 0 &&
+ mlx5_vdpa_sys_roce_disable(pci_addr) != 0)
+ return -rte_errno;
+ return 0;
+}
+
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+ struct mlx5_vdpa_priv *priv = opaque;
+ unsigned long tmp;
+ int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (strcmp(key, RTE_DEVARGS_KEY_CLASS) == 0)
+ return 0;
+ errno = 0;
+ tmp = strtoul(val, NULL, 0);
+ if (errno) {
+ DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+ return -errno;
+ }
+ if (strcmp(key, "event_mode") == 0) {
+ if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+ priv->event_mode = (int)tmp;
+ else
+ DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+ } else if (strcmp(key, "event_us") == 0) {
+ priv->event_us = (uint32_t)tmp;
+ } else if (strcmp(key, "no_traffic_time") == 0) {
+ priv->no_traffic_max = (uint32_t)tmp;
+ } else if (strcmp(key, "event_core") == 0) {
+ if (tmp >= (unsigned long)n_cores)
+ DRV_LOG(WARNING, "Invalid event_core %s.", val);
+ else
+ priv->event_core = tmp;
+ } else if (strcmp(key, "hw_latency_mode") == 0) {
+ priv->hw_latency_mode = (uint32_t)tmp;
+ } else if (strcmp(key, "hw_max_latency_us") == 0) {
+ priv->hw_max_latency_us = (uint32_t)tmp;
+ } else if (strcmp(key, "hw_max_pending_comp") == 0) {
+ priv->hw_max_pending_comp = (uint32_t)tmp;
+ } else {
+ DRV_LOG(WARNING, "Invalid key %s.", key);
+ }
+ return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+ struct rte_kvargs *kvlist;
+
+ priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
+ priv->event_us = 0;
+ priv->event_core = -1;
+ priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
+ if (devargs == NULL)
+ return;
+ kvlist = rte_kvargs_parse(devargs->args, NULL);
+ if (kvlist == NULL)
+ return;
+ rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+ rte_kvargs_free(kvlist);
+ if (!priv->event_us &&
+ priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+ priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+ DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+ DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+ DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
+}
+
+static int
+mlx5_vdpa_dev_probe(struct rte_device *dev)
+{
+ struct ibv_device *ibv;