#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include <rte_string_fns.h>
+#include <rte_alarm.h>
#include "mlx5.h"
#include "mlx5_utils.h"
/* Select port representors to instantiate. */
#define MLX5_REPRESENTOR "representor"
+/* Device parameter to configure the maximum number of dump files per queue. */
+#define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num"
+
#ifndef HAVE_IBV_MLX5_MOD_MPW
#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
struct mlx5_switch_info info; /**< Switch information. */
struct ibv_device *ibv_dev; /**< Associated IB device. */
struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
+ struct rte_pci_device *pci_dev; /**< Backend PCI device. */
};
static LIST_HEAD(, mlx5_ibv_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER();
static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+/**
+ * Initialize the counters management structure.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object to free
+ */
+static void
+mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
+{
+ uint8_t i;
+
+ TAILQ_INIT(&sh->cmng.flow_counters);
+ for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
+ TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+}
+
+/**
+ * Destroy all the resources allocated for a counter memory management.
+ *
+ * @param[in] mng
+ * Pointer to the memory management structure.
+ */
+static void
+mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng)
+{
+ uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data;
+
+ LIST_REMOVE(mng, next);
+ claim_zero(mlx5_devx_cmd_destroy(mng->dm));
+ claim_zero(mlx5_glue->devx_umem_dereg(mng->umem));
+ rte_free(mem);
+}
+
+/**
+ * Close and release all the resources of the counters management.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object to free.
+ */
+static void
+mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
+{
+ struct mlx5_counter_stats_mem_mng *mng;
+ uint8_t i;
+ int j;
+ int retries = 1024;
+
+ rte_errno = 0;
+ while (--retries) {
+ rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh);
+ if (rte_errno != EINPROGRESS)
+ break;
+ rte_pause();
+ }
+ for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
+ struct mlx5_flow_counter_pool *pool;
+ uint32_t batch = !!(i % 2);
+
+ if (!sh->cmng.ccont[i].pools)
+ continue;
+ pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+ while (pool) {
+ if (batch) {
+ if (pool->min_dcs)
+ claim_zero
+ (mlx5_devx_cmd_destroy(pool->min_dcs));
+ }
+ for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
+ if (pool->counters_raw[j].action)
+ claim_zero
+ (mlx5_glue->destroy_flow_action
+ (pool->counters_raw[j].action));
+ if (!batch && pool->counters_raw[j].dcs)
+ claim_zero(mlx5_devx_cmd_destroy
+ (pool->counters_raw[j].dcs));
+ }
+ TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
+ next);
+ rte_free(pool);
+ pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+ }
+ rte_free(sh->cmng.ccont[i].pools);
+ }
+ mng = LIST_FIRST(&sh->cmng.mem_mngs);
+ while (mng) {
+ mlx5_flow_destroy_counter_stat_mem_mng(mng);
+ mng = LIST_FIRST(&sh->cmng.mem_mngs);
+ }
+ memset(&sh->cmng, 0, sizeof(sh->cmng));
+}
+
/**
* Allocate shared IB device context. If there is multiport device the
* master and representors will share this context, if there is single
sizeof(sh->ibdev_name));
strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path,
sizeof(sh->ibdev_path));
+ sh->pci_dev = spawn->pci_dev;
pthread_mutex_init(&sh->intr_mutex, NULL);
/*
* Setting port_id to max unallowed value means
err = ENOMEM;
goto error;
}
+ /*
+ * Once the device is added to the list of memory event
+ * callback, its global MR cache table cannot be expanded
+ * on the fly because of deadlock. If it overflows, lookup
+ * should be done by searching MR list linearly, which is slow.
+ *
+ * At this point the device is not added to the memory
+ * event list yet, context is just being created.
+ */
+ err = mlx5_mr_btree_init(&sh->mr.cache,
+ MLX5_MR_BTREE_CACHE_N * 2,
+ sh->pci_dev->device.numa_node);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ mlx5_flow_counters_mng_init(sh);
LIST_INSERT_HEAD(&mlx5_ibv_list, sh, next);
exit:
pthread_mutex_unlock(&mlx5_ibv_list_mutex);
assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
if (--sh->refcnt)
goto exit;
+ /* Release created Memory Regions. */
+ mlx5_mr_release(sh);
LIST_REMOVE(sh, next);
/*
* Ensure there is no async event handler installed.
* Only primary process handles async device events.
**/
+ mlx5_flow_counters_mng_close(sh);
assert(!sh->intr_cnt);
if (sh->intr_cnt)
- rte_intr_callback_unregister
+ mlx5_intr_callback_unregister
(&sh->intr_handle, mlx5_dev_interrupt_handler, sh);
pthread_mutex_destroy(&sh->intr_mutex);
if (sh->pd)
#ifdef HAVE_MLX5DV_DR
struct mlx5_ibv_shared *sh = priv->sh;
int err = 0;
- void *ns;
+ void *domain;
assert(sh);
if (sh->dv_refcnt) {
return 0;
}
/* Reference counter is zero, we should initialize structures. */
- ns = mlx5_glue->dr_create_ns(sh->ctx,
- MLX5DV_DR_NS_DOMAIN_INGRESS_BYPASS);
- if (!ns) {
- DRV_LOG(ERR, "ingress mlx5dv_dr_create_ns failed");
+ domain = mlx5_glue->dr_create_domain(sh->ctx,
+ MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
+ if (!domain) {
+ DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed");
err = errno;
goto error;
}
- sh->rx_ns = ns;
- ns = mlx5_glue->dr_create_ns(sh->ctx,
- MLX5DV_DR_NS_DOMAIN_EGRESS_BYPASS);
- if (!ns) {
- DRV_LOG(ERR, "egress mlx5dv_dr_create_ns failed");
+ sh->rx_domain = domain;
+ domain = mlx5_glue->dr_create_domain(sh->ctx,
+ MLX5DV_DR_DOMAIN_TYPE_NIC_TX);
+ if (!domain) {
+ DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed");
err = errno;
goto error;
}
pthread_mutex_init(&sh->dv_mutex, NULL);
- sh->tx_ns = ns;
+ sh->tx_domain = domain;
#ifdef HAVE_MLX5DV_DR_ESWITCH
if (priv->config.dv_esw_en) {
- ns = mlx5_glue->dr_create_ns(sh->ctx,
- MLX5DV_DR_NS_DOMAIN_FDB_BYPASS);
- if (!ns) {
- DRV_LOG(ERR, "FDB mlx5dv_dr_create_ns failed");
+ domain = mlx5_glue->dr_create_domain
+ (sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);
+ if (!domain) {
+ DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed");
err = errno;
goto error;
}
- sh->fdb_ns = ns;
+ sh->fdb_domain = domain;
sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
}
#endif
error:
/* Rollback the created objects. */
- if (sh->rx_ns) {
- mlx5_glue->dr_destroy_ns(sh->rx_ns);
- sh->rx_ns = NULL;
+ if (sh->rx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->rx_domain);
+ sh->rx_domain = NULL;
}
- if (sh->tx_ns) {
- mlx5_glue->dr_destroy_ns(sh->tx_ns);
- sh->tx_ns = NULL;
+ if (sh->tx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->tx_domain);
+ sh->tx_domain = NULL;
}
- if (sh->fdb_ns) {
- mlx5_glue->dr_destroy_ns(sh->fdb_ns);
- sh->fdb_ns = NULL;
+ if (sh->fdb_domain) {
+ mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+ sh->fdb_domain = NULL;
}
if (sh->esw_drop_action) {
mlx5_glue->destroy_flow_action(sh->esw_drop_action);
assert(sh->dv_refcnt);
if (sh->dv_refcnt && --sh->dv_refcnt)
return;
- if (sh->rx_ns) {
- mlx5_glue->dr_destroy_ns(sh->rx_ns);
- sh->rx_ns = NULL;
+ if (sh->rx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->rx_domain);
+ sh->rx_domain = NULL;
}
- if (sh->tx_ns) {
- mlx5_glue->dr_destroy_ns(sh->tx_ns);
- sh->tx_ns = NULL;
+ if (sh->tx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->tx_domain);
+ sh->tx_domain = NULL;
}
#ifdef HAVE_MLX5DV_DR_ESWITCH
- if (sh->fdb_ns) {
- mlx5_glue->dr_destroy_ns(sh->fdb_ns);
- sh->fdb_ns = NULL;
+ if (sh->fdb_domain) {
+ mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+ sh->fdb_domain = NULL;
}
if (sh->esw_drop_action) {
mlx5_glue->destroy_flow_action(sh->esw_drop_action);
}
mlx5_proc_priv_uninit(dev);
mlx5_mprq_free_mp(dev);
- mlx5_mr_release(dev);
+ /* Remove from memory callback device list. */
+ rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
assert(priv->sh);
+ LIST_REMOVE(priv->sh, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
mlx5_free_shared_dr(priv);
if (priv->rss_conf.rss_key != NULL)
rte_free(priv->rss_conf.rss_key);
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->tcf_context)
- mlx5_flow_tcf_context_destroy(priv->tcf_context);
if (priv->sh) {
/*
* Free the shared context in last turn, because the cleanup
.xstats_get_names = mlx5_xstats_get_names,
.fw_version_get = mlx5_fw_version_get,
.dev_infos_get = mlx5_dev_infos_get,
+ .read_clock = mlx5_read_clock,
.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
.vlan_filter_set = mlx5_vlan_filter_set,
.rx_queue_setup = mlx5_rx_queue_setup,
config->dv_flow_en = !!tmp;
} else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) {
config->mr_ext_memseg_en = !!tmp;
+ } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) {
+ config->max_dump_files_num = tmp;
} else {
DRV_LOG(WARNING, "%s: unknown parameter", key);
rte_errno = EINVAL;
MLX5_DV_FLOW_EN,
MLX5_MR_EXT_MEMSEG_EN,
MLX5_REPRESENTOR,
+ MLX5_MAX_DUMP_FILES_NUM,
NULL,
};
struct rte_kvargs *kvlist;
return 0;
/* Following UGLY cast is done to pass checkpatch. */
kvlist = rte_kvargs_parse(devargs->args, params);
- if (kvlist == NULL)
- return 0;
+ if (kvlist == NULL) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
/* Process parameters. */
for (i = 0; (params[i] != NULL); ++i) {
if (rte_kvargs_count(kvlist, params[i])) {
{
struct mlx5_shared_data *sd;
struct mlx5_local_data *ld = &mlx5_local_data;
+ int ret = 0;
if (mlx5_init_shared_data())
return -rte_errno;
rte_rwlock_init(&sd->mem_event_rwlock);
rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
mlx5_mr_mem_event_cb, NULL);
- mlx5_mp_init_primary();
+ ret = mlx5_mp_init_primary();
+ if (ret)
+ goto out;
sd->init_done = true;
break;
case RTE_PROC_SECONDARY:
if (ld->init_done)
break;
- mlx5_mp_init_secondary();
+ ret = mlx5_mp_init_secondary();
+ if (ret)
+ goto out;
++sd->secondary_cnt;
ld->init_done = true;
break;
default:
break;
}
+out:
rte_spinlock_unlock(&sd->lock);
- return 0;
+ return ret;
}
/**
unsigned int mprq_max_stride_size_n = 0;
unsigned int mprq_min_stride_num_n = 0;
unsigned int mprq_max_stride_num_n = 0;
- struct ether_addr mac;
+ struct rte_ether_addr mac;
char name[RTE_ETH_NAME_MAX_LEN];
int own_domain_id = 0;
uint16_t port_id;
}
priv->sh = sh;
priv->ibv_port = spawn->ibv_port;
- priv->mtu = ETHER_MTU;
+ priv->mtu = RTE_ETHER_MTU;
#ifndef RTE_ARCH_64
/* Initialize UAR access locks for 32bit implementations. */
rte_spinlock_init(&priv->uar_lock_cq);
DRV_LOG(WARNING, "Multi-Packet RQ isn't supported");
config.mprq.enabled = 0;
}
+ if (config.max_dump_files_num == 0)
+ config.max_dump_files_num = 128;
eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
DRV_LOG(ERR, "can not allocate rte ethdev");
claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
if (config.vf && config.vf_nl_en)
mlx5_nl_mac_addr_sync(eth_dev);
- priv->tcf_context = mlx5_flow_tcf_context_create();
- if (!priv->tcf_context) {
- err = -rte_errno;
- DRV_LOG(WARNING,
- "flow rules relying on switch offloads will not be"
- " supported: cannot open libmnl socket: %s",
- strerror(rte_errno));
- } else {
- struct rte_flow_error error;
- unsigned int ifindex = mlx5_ifindex(eth_dev);
-
- if (!ifindex) {
- err = -rte_errno;
- error.message =
- "cannot retrieve network interface index";
- } else {
- err = mlx5_flow_tcf_init(priv->tcf_context,
- ifindex, &error);
- }
- if (err) {
- DRV_LOG(WARNING,
- "flow rules relying on switch offloads will"
- " not be supported: %s: %s",
- error.message, strerror(rte_errno));
- mlx5_flow_tcf_context_destroy(priv->tcf_context);
- priv->tcf_context = NULL;
- }
- }
TAILQ_INIT(&priv->flows);
TAILQ_INIT(&priv->ctrl_flows);
/* Hint libmlx5 to use PMD allocator for data plane resources */
mlx5_link_update(eth_dev, 0);
#ifdef HAVE_IBV_DEVX_OBJ
if (config.devx) {
+ priv->counter_fallback = 0;
err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
if (err) {
err = -err;
goto error;
}
+ if (!config.hca_attr.flow_counters_dump)
+ priv->counter_fallback = 1;
+#ifndef HAVE_IBV_DEVX_ASYNC
+ priv->counter_fallback = 1;
+#endif
+ if (priv->counter_fallback)
+ DRV_LOG(INFO, "Use fall-back DV counter management\n");
}
#endif
#ifdef HAVE_MLX5DV_DR_ESWITCH
goto error;
}
priv->config.flow_prio = err;
- /*
- * Once the device is added to the list of memory event
- * callback, its global MR cache table cannot be expanded
- * on the fly because of deadlock. If it overflows, lookup
- * should be done by searching MR list linearly, which is slow.
- */
- err = mlx5_mr_btree_init(&priv->mr.cache,
- MLX5_MR_BTREE_CACHE_N * 2,
- eth_dev->device->numa_node);
- if (err) {
- err = rte_errno;
- goto error;
- }
/* Add device to memory callback list. */
rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
- priv, mem_event_cb);
+ sh, mem_event_cb);
rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
return eth_dev;
error:
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->tcf_context)
- mlx5_flow_tcf_context_destroy(priv->tcf_context);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
rte_free(priv);
list[ns].ibv_port = i;
list[ns].ibv_dev = ibv_match[0];
list[ns].eth_dev = NULL;
+ list[ns].pci_dev = pci_dev;
list[ns].ifindex = mlx5_nl_ifindex
(nl_rdma, list[ns].ibv_dev->name, i);
if (!list[ns].ifindex) {
list[ns].ibv_port = 1;
list[ns].ibv_dev = ibv_match[i];
list[ns].eth_dev = NULL;
+ list[ns].pci_dev = pci_dev;
list[ns].ifindex = 0;
if (nl_rdma >= 0)
list[ns].ifindex = mlx5_nl_ifindex
.remove = mlx5_pci_remove,
.dma_map = mlx5_dma_map,
.dma_unmap = mlx5_dma_unmap,
- .drv_flags = (RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
- RTE_PCI_DRV_PROBE_AGAIN),
+ .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
+ RTE_PCI_DRV_PROBE_AGAIN,
};
#ifdef RTE_IBVERBS_LINK_DLOPEN