+mlx5_restore_doorbell_mapping_env(int value)
+{
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ /* Restore the original environment variable state. */
+ if (value == MLX5_ARG_UNSET)
+ unsetenv(MLX5_SHUT_UP_BF);
+ else
+ setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
+}
+
+/**
+ * Allocate shared IB device context. If there is multiport device the
+ * master and representors will share this context, if there is single
+ * port dedicated IB device, the context will be used by only given
+ * port due to unification.
+ *
+ * Routine first searches the context for the specified IB device name,
+ * if found the shared context assumed and reference counter is incremented.
+ * If no context found the new one is created and initialized with specified
+ * IB device context and parameters.
+ *
+ * @param[in] spawn
+ * Pointer to the IB device attributes (name, port, etc).
+ * @param[in] config
+ * Pointer to device configuration structure.
+ *
+ * @return
+ * Pointer to mlx5_ibv_shared object on success,
+ * otherwise NULL and rte_errno is set.
+ */
+static struct mlx5_ibv_shared *
+mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
+ const struct mlx5_dev_config *config)
+{
+ struct mlx5_ibv_shared *sh;
+ int dbmap_env;
+ int err = 0;
+ uint32_t i;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5_devx_tis_attr tis_attr = { 0 };
+#endif
+
+ MLX5_ASSERT(spawn);
+ /* Secondary process should not create the shared context. */
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ pthread_mutex_lock(&mlx5_ibv_list_mutex);
+ /* Search for IB context by device name. */
+ LIST_FOREACH(sh, &mlx5_ibv_list, next) {
+ if (!strcmp(sh->ibdev_name, spawn->ibv_dev->name)) {
+ sh->refcnt++;
+ goto exit;
+ }
+ }
+ /* No device found, we have to create new shared context. */
+ MLX5_ASSERT(spawn->max_port);
+ sh = rte_zmalloc("ethdev shared ib context",
+ sizeof(struct mlx5_ibv_shared) +
+ spawn->max_port *
+ sizeof(struct mlx5_ibv_shared_port),
+ RTE_CACHE_LINE_SIZE);
+ if (!sh) {
+ DRV_LOG(ERR, "shared context allocation failure");
+ rte_errno = ENOMEM;
+ goto exit;
+ }
+ /*
+ * Configure environment variable "MLX5_BF_SHUT_UP"
+ * before the device creation. The rdma_core library
+ * checks the variable at device creation and
+ * stores the result internally.
+ */
+ dbmap_env = mlx5_config_doorbell_mapping_env(config);
+ /* Try to open IB device with DV first, then usual Verbs. */
+ errno = 0;
+ sh->ctx = mlx5_glue->dv_open_device(spawn->ibv_dev);
+ if (sh->ctx) {
+ sh->devx = 1;
+ DRV_LOG(DEBUG, "DevX is supported");
+ /* The device is created, no need for environment. */
+ mlx5_restore_doorbell_mapping_env(dbmap_env);
+ } else {
+ /* The environment variable is still configured. */
+ sh->ctx = mlx5_glue->open_device(spawn->ibv_dev);
+ err = errno ? errno : ENODEV;
+ /*
+ * The environment variable is not needed anymore,
+ * all device creation attempts are completed.
+ */
+ mlx5_restore_doorbell_mapping_env(dbmap_env);
+ if (!sh->ctx)
+ goto error;
+ DRV_LOG(DEBUG, "DevX is NOT supported");
+ }
+ err = mlx5_glue->query_device_ex(sh->ctx, NULL, &sh->device_attr);
+ if (err) {
+ DRV_LOG(DEBUG, "ibv_query_device_ex() failed");
+ goto error;
+ }
+ sh->refcnt = 1;
+ sh->max_port = spawn->max_port;
+ strncpy(sh->ibdev_name, sh->ctx->device->name,
+ sizeof(sh->ibdev_name));
+ strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path,
+ sizeof(sh->ibdev_path));
+ pthread_mutex_init(&sh->intr_mutex, NULL);
+ /*
+ * Setting port_id to max unallowed value means
+ * there is no interrupt subhandler installed for
+ * the given port index i.
+ */
+ for (i = 0; i < sh->max_port; i++) {
+ sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
+ sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
+ }
+ sh->pd = mlx5_glue->alloc_pd(sh->ctx);
+ if (sh->pd == NULL) {
+ DRV_LOG(ERR, "PD allocation failure");
+ err = ENOMEM;
+ goto error;
+ }
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ if (sh->devx) {
+ err = mlx5_get_pdn(sh->pd, &sh->pdn);
+ if (err) {
+ DRV_LOG(ERR, "Fail to extract pdn from PD");
+ goto error;
+ }
+ sh->td = mlx5_devx_cmd_create_td(sh->ctx);
+ if (!sh->td) {
+ DRV_LOG(ERR, "TD allocation failure");
+ err = ENOMEM;
+ goto error;
+ }
+ tis_attr.transport_domain = sh->td->id;
+ sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
+ if (!sh->tis) {
+ DRV_LOG(ERR, "TIS allocation failure");
+ err = ENOMEM;
+ goto error;
+ }
+ }
+ sh->flow_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX);
+ if (!sh->flow_id_pool) {
+ DRV_LOG(ERR, "can't create flow id pool");
+ err = ENOMEM;
+ goto error;
+ }
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+ /*
+ * Once the device is added to the list of memory event
+ * callback, its global MR cache table cannot be expanded
+ * on the fly because of deadlock. If it overflows, lookup
+ * should be done by searching MR list linearly, which is slow.
+ *
+ * At this point the device is not added to the memory
+ * event list yet, context is just being created.
+ */
+ err = mlx5_mr_btree_init(&sh->mr.cache,
+ MLX5_MR_BTREE_CACHE_N * 2,
+ spawn->pci_dev->device.numa_node);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ mlx5_flow_counters_mng_init(sh);
+ /* Add device to memory callback list. */
+ rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
+ sh, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
+ /* Add context to the global device list. */
+ LIST_INSERT_HEAD(&mlx5_ibv_list, sh, next);
+exit:
+ pthread_mutex_unlock(&mlx5_ibv_list_mutex);
+ return sh;
+error:
+ pthread_mutex_unlock(&mlx5_ibv_list_mutex);
+ MLX5_ASSERT(sh);
+ if (sh->tis)
+ claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+ if (sh->td)
+ claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->pd)
+ claim_zero(mlx5_glue->dealloc_pd(sh->pd));
+ if (sh->ctx)
+ claim_zero(mlx5_glue->close_device(sh->ctx));
+ if (sh->flow_id_pool)
+ mlx5_flow_id_pool_release(sh->flow_id_pool);
+ rte_free(sh);
+ MLX5_ASSERT(err > 0);
+ rte_errno = err;
+ return NULL;
+}
+
+/**
+ * Free shared IB device context. Decrement counter and if zero free
+ * all allocated resources and close handles.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object to free
+ */
+static void
+mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh)
+{
+ pthread_mutex_lock(&mlx5_ibv_list_mutex);
+#ifdef RTE_LIBRTE_MLX5_DEBUG
+ /* Check the object presence in the list. */
+ struct mlx5_ibv_shared *lctx;
+
+ LIST_FOREACH(lctx, &mlx5_ibv_list, next)
+ if (lctx == sh)
+ break;
+ MLX5_ASSERT(lctx);
+ if (lctx != sh) {
+ DRV_LOG(ERR, "Freeing non-existing shared IB context");
+ goto exit;
+ }
+#endif
+ MLX5_ASSERT(sh);
+ MLX5_ASSERT(sh->refcnt);
+ /* Secondary process should not free the shared context. */
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (--sh->refcnt)
+ goto exit;
+ /* Remove from memory callback device list. */
+ rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
+ LIST_REMOVE(sh, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
+ /* Release created Memory Regions. */
+ mlx5_mr_release(sh);
+ /* Remove context from the global device list. */
+ LIST_REMOVE(sh, next);
+ /*
+ * Ensure there is no async event handler installed.
+ * Only primary process handles async device events.
+ **/
+ mlx5_flow_counters_mng_close(sh);
+ MLX5_ASSERT(!sh->intr_cnt);
+ if (sh->intr_cnt)
+ mlx5_intr_callback_unregister
+ (&sh->intr_handle, mlx5_dev_interrupt_handler, sh);
+#ifdef HAVE_MLX5_DEVX_ASYNC_SUPPORT
+ if (sh->devx_intr_cnt) {
+ if (sh->intr_handle_devx.fd)
+ rte_intr_callback_unregister(&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx, sh);
+ if (sh->devx_comp)
+ mlx5dv_devx_destroy_cmd_comp(sh->devx_comp);
+ }
+#endif
+ pthread_mutex_destroy(&sh->intr_mutex);
+ if (sh->pd)
+ claim_zero(mlx5_glue->dealloc_pd(sh->pd));
+ if (sh->tis)
+ claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+ if (sh->td)
+ claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->ctx)
+ claim_zero(mlx5_glue->close_device(sh->ctx));
+ if (sh->flow_id_pool)
+ mlx5_flow_id_pool_release(sh->flow_id_pool);
+ rte_free(sh);
+exit:
+ pthread_mutex_unlock(&mlx5_ibv_list_mutex);
+}
+
+/**
+ * Destroy table hash list and all the root entries per domain.
+ *
+ * @param[in] priv
+ * Pointer to the private device data structure.
+ */
+static void
+mlx5_free_table_hash_list(struct mlx5_priv *priv)
+{
+ struct mlx5_ibv_shared *sh = priv->sh;
+ struct mlx5_flow_tbl_data_entry *tbl_data;
+ union mlx5_flow_tbl_key table_key = {
+ {
+ .table_id = 0,
+ .reserved = 0,
+ .domain = 0,
+ .direction = 0,
+ }
+ };
+ struct mlx5_hlist_entry *pos;
+
+ if (!sh->flow_tbls)
+ return;
+ pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64);
+ if (pos) {
+ tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry,
+ entry);
+ MLX5_ASSERT(tbl_data);
+ mlx5_hlist_remove(sh->flow_tbls, pos);
+ rte_free(tbl_data);
+ }
+ table_key.direction = 1;
+ pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64);
+ if (pos) {
+ tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry,
+ entry);
+ MLX5_ASSERT(tbl_data);
+ mlx5_hlist_remove(sh->flow_tbls, pos);
+ rte_free(tbl_data);
+ }
+ table_key.direction = 0;
+ table_key.domain = 1;
+ pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64);
+ if (pos) {
+ tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry,
+ entry);
+ MLX5_ASSERT(tbl_data);
+ mlx5_hlist_remove(sh->flow_tbls, pos);
+ rte_free(tbl_data);
+ }
+ mlx5_hlist_destroy(sh->flow_tbls, NULL, NULL);
+}
+
+/**
+ * Initialize flow table hash list and create the root tables entry
+ * for each domain.
+ *
+ * @param[in] priv
+ * Pointer to the private device data structure.
+ *
+ * @return
+ * Zero on success, positive error code otherwise.
+ */
+static int
+mlx5_alloc_table_hash_list(struct mlx5_priv *priv)
+{
+ struct mlx5_ibv_shared *sh = priv->sh;
+ char s[MLX5_HLIST_NAMESIZE];
+ int err = 0;
+
+ MLX5_ASSERT(sh);
+ snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name);
+ sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE);
+ if (!sh->flow_tbls) {
+ DRV_LOG(ERR, "flow tables with hash creation failed.\n");
+ err = ENOMEM;
+ return err;
+ }
+#ifndef HAVE_MLX5DV_DR
+ /*
+ * In case we have not DR support, the zero tables should be created
+ * because DV expect to see them even if they cannot be created by
+ * RDMA-CORE.
+ */
+ union mlx5_flow_tbl_key table_key = {
+ {
+ .table_id = 0,
+ .reserved = 0,
+ .domain = 0,
+ .direction = 0,
+ }
+ };
+ struct mlx5_flow_tbl_data_entry *tbl_data = rte_zmalloc(NULL,
+ sizeof(*tbl_data), 0);
+
+ if (!tbl_data) {
+ err = ENOMEM;
+ goto error;
+ }
+ tbl_data->entry.key = table_key.v64;
+ err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry);
+ if (err)
+ goto error;
+ rte_atomic32_init(&tbl_data->tbl.refcnt);
+ rte_atomic32_inc(&tbl_data->tbl.refcnt);
+ table_key.direction = 1;
+ tbl_data = rte_zmalloc(NULL, sizeof(*tbl_data), 0);
+ if (!tbl_data) {
+ err = ENOMEM;
+ goto error;
+ }
+ tbl_data->entry.key = table_key.v64;
+ err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry);
+ if (err)
+ goto error;
+ rte_atomic32_init(&tbl_data->tbl.refcnt);
+ rte_atomic32_inc(&tbl_data->tbl.refcnt);
+ table_key.direction = 0;
+ table_key.domain = 1;
+ tbl_data = rte_zmalloc(NULL, sizeof(*tbl_data), 0);
+ if (!tbl_data) {
+ err = ENOMEM;
+ goto error;
+ }
+ tbl_data->entry.key = table_key.v64;
+ err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry);
+ if (err)
+ goto error;
+ rte_atomic32_init(&tbl_data->tbl.refcnt);
+ rte_atomic32_inc(&tbl_data->tbl.refcnt);
+ return err;
+error:
+ mlx5_free_table_hash_list(priv);
+#endif /* HAVE_MLX5DV_DR */
+ return err;
+}
+
+/**
+ * Initialize DR related data within private structure.
+ * Routine checks the reference counter and does actual
+ * resources creation/initialization only if counter is zero.
+ *
+ * @param[in] priv
+ * Pointer to the private device data structure.
+ *
+ * @return
+ * Zero on success, positive error code otherwise.
+ */
+static int
+mlx5_alloc_shared_dr(struct mlx5_priv *priv)
+{
+ struct mlx5_ibv_shared *sh = priv->sh;
+ char s[MLX5_HLIST_NAMESIZE];
+ int err = 0;
+
+ if (!sh->flow_tbls)
+ err = mlx5_alloc_table_hash_list(priv);
+ else
+ DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n",
+ (void *)sh->flow_tbls);
+ if (err)
+ return err;
+ /* Create tags hash list table. */
+ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name);
+ sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE);
+ if (!sh->tag_table) {
+ DRV_LOG(ERR, "tags with hash creation failed.\n");
+ err = ENOMEM;
+ goto error;
+ }
+#ifdef HAVE_MLX5DV_DR
+ void *domain;
+
+ if (sh->dv_refcnt) {
+ /* Shared DV/DR structures is already initialized. */
+ sh->dv_refcnt++;
+ priv->dr_shared = 1;
+ return 0;
+ }
+ /* Reference counter is zero, we should initialize structures. */
+ domain = mlx5_glue->dr_create_domain(sh->ctx,
+ MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
+ if (!domain) {
+ DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed");
+ err = errno;
+ goto error;
+ }
+ sh->rx_domain = domain;
+ domain = mlx5_glue->dr_create_domain(sh->ctx,
+ MLX5DV_DR_DOMAIN_TYPE_NIC_TX);
+ if (!domain) {
+ DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed");
+ err = errno;
+ goto error;
+ }
+ pthread_mutex_init(&sh->dv_mutex, NULL);
+ sh->tx_domain = domain;
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+ if (priv->config.dv_esw_en) {
+ domain = mlx5_glue->dr_create_domain
+ (sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);
+ if (!domain) {
+ DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed");
+ err = errno;
+ goto error;
+ }
+ sh->fdb_domain = domain;
+ sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
+ }
+#endif
+ sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
+#endif /* HAVE_MLX5DV_DR */
+ sh->dv_refcnt++;
+ priv->dr_shared = 1;
+ return 0;
+error:
+ /* Rollback the created objects. */
+ if (sh->rx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->rx_domain);
+ sh->rx_domain = NULL;
+ }
+ if (sh->tx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->tx_domain);
+ sh->tx_domain = NULL;
+ }
+ if (sh->fdb_domain) {
+ mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+ sh->fdb_domain = NULL;
+ }
+ if (sh->esw_drop_action) {
+ mlx5_glue->destroy_flow_action(sh->esw_drop_action);
+ sh->esw_drop_action = NULL;
+ }
+ if (sh->pop_vlan_action) {
+ mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+ sh->pop_vlan_action = NULL;
+ }
+ if (sh->tag_table) {
+ /* tags should be destroyed with flow before. */
+ mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
+ sh->tag_table = NULL;
+ }
+ mlx5_free_table_hash_list(priv);
+ return err;
+}
+
+/**
+ * Destroy DR related data within private structure.
+ *
+ * @param[in] priv
+ * Pointer to the private device data structure.
+ */
+static void
+mlx5_free_shared_dr(struct mlx5_priv *priv)
+{
+ struct mlx5_ibv_shared *sh;
+
+ if (!priv->dr_shared)
+ return;
+ priv->dr_shared = 0;
+ sh = priv->sh;
+ MLX5_ASSERT(sh);
+#ifdef HAVE_MLX5DV_DR
+ MLX5_ASSERT(sh->dv_refcnt);
+ if (sh->dv_refcnt && --sh->dv_refcnt)
+ return;
+ if (sh->rx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->rx_domain);
+ sh->rx_domain = NULL;
+ }
+ if (sh->tx_domain) {
+ mlx5_glue->dr_destroy_domain(sh->tx_domain);
+ sh->tx_domain = NULL;
+ }
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+ if (sh->fdb_domain) {
+ mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+ sh->fdb_domain = NULL;
+ }
+ if (sh->esw_drop_action) {
+ mlx5_glue->destroy_flow_action(sh->esw_drop_action);
+ sh->esw_drop_action = NULL;
+ }
+#endif
+ if (sh->pop_vlan_action) {
+ mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+ sh->pop_vlan_action = NULL;
+ }
+ pthread_mutex_destroy(&sh->dv_mutex);
+#endif /* HAVE_MLX5DV_DR */
+ if (sh->tag_table) {
+ /* tags should be destroyed with flow before. */
+ mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
+ sh->tag_table = NULL;
+ }
+ mlx5_free_table_hash_list(priv);
+}
+
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_init_shared_data(void)