#include <sys/mman.h>
#include <linux/rtnetlink.h>
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
#include <rte_malloc.h>
#include <rte_ethdev_driver.h>
#include <rte_ethdev_pci.h>
#include <mlx5_common.h>
#include <mlx5_common_os.h>
#include <mlx5_common_mp.h>
+#include <mlx5_common_pci.h>
#include <mlx5_malloc.h>
#include "mlx5_defs.h"
/* The default memory allocator used in PMD. */
#define MLX5_SYS_MEM_EN "sys_mem_en"
-
-static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data";
+/* Decap will be used or not. */
+#define MLX5_DECAP_EN "decap_en"
/* Shared memory between primary and secondary processes. */
struct mlx5_shared_data *mlx5_shared_data;
-/* Spinlock for mlx5_shared_data allocation. */
-static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
-
-/* Process local data for secondary processes. */
-static struct mlx5_local_data mlx5_local_data;
+/** Driver-specific log messages type. */
+int mlx5_logtype;
static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list =
LIST_HEAD_INITIALIZER();
prf->obj = NULL;
}
+/*
+ * Allocate Rx and Tx UARs in robust fashion.
+ * This routine handles the following UAR allocation issues:
+ *
+ * - tries to allocate the UAR with the most appropriate memory
+ * mapping type from the ones supported by the host
+ *
+ * - tries to allocate the UAR with non-NULL base address
+ * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as
+ * UAR base address if UAR was not the first object in the UAR page.
+ * It caused the PMD failure and we should try to get another UAR
+ * till we get the first one with non-NULL base address returned.
+ */
+static int
+mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
+ const struct mlx5_dev_config *config)
+{
+ uint32_t uar_mapping, retry;
+ int err = 0;
+
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ /* Control the mapping type according to the settings. */
+ uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ?
+ MLX5DV_UAR_ALLOC_TYPE_NC :
+ MLX5DV_UAR_ALLOC_TYPE_BF;
+#else
+ RTE_SET_USED(config);
+ /*
+ * It seems we have no way to control the memory mapping type
+ * for the UAR, the default "Write-Combining" type is supposed.
+ * The UAR initialization on queue creation queries the
+ * actual mapping type done by Verbs/kernel and setups the
+ * PMD datapath accordingly.
+ */
+ uar_mapping = 0;
+#endif
+ sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ if (config->dbnc == MLX5_TXDB_CACHED ||
+ config->dbnc == MLX5_TXDB_HEURISTIC)
+ DRV_LOG(WARNING, "Devarg tx_db_nc setting "
+ "is not supported by DevX");
+ /*
+ * In some environments like virtual machine
+ * the Write Combining mapped might be not supported
+ * and UAR allocation fails. We try "Non-Cached"
+ * mapping for the case. The tx_burst routines take
+ * the UAR mapping type into account on UAR setup
+ * on queue creation.
+ */
+ DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ } else if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
+ if (config->dbnc == MLX5_TXDB_NCACHED)
+ DRV_LOG(WARNING, "Devarg tx_db_nc settings "
+ "is not supported by DevX");
+ /*
+ * If Verbs/kernel does not support "Non-Cached"
+ * try the "Write-Combining".
+ */
+ DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (NC)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ if (sh->tx_uar->base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(WARNING, "Retrying to allocate Tx DevX UAR");
+ sh->tx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)");
+ err = ENOMEM;
+ goto exit;
+ }
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+ uar_mapping = 0;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->devx_rx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ /*
+ * Rx UAR is used to control interrupts only,
+ * should be no datapath noticeable impact,
+ * can try "Non-Cached" mapping safely.
+ */
+ DRV_LOG(WARNING, "Failed to allocate Rx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ if (sh->devx_rx_uar->base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(WARNING, "Retrying to allocate Rx DevX UAR");
+ sh->devx_rx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)");
+ err = ENOMEM;
+ }
+exit:
+ return err;
+}
+
/**
* Allocate shared device context. If there is multiport device the
* master and representors will share this context, if there is single
err = ENOMEM;
goto error;
}
- sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, 0);
- if (!sh->tx_uar) {
- DRV_LOG(ERR, "Failed to allocate DevX UAR.");
- err = ENOMEM;
+ err = mlx5_alloc_rxtx_uars(sh, config);
+ if (err)
goto error;
- }
+ MLX5_ASSERT(sh->tx_uar && sh->tx_uar->base_addr);
+ MLX5_ASSERT(sh->devx_rx_uar && sh->devx_rx_uar->base_addr);
}
sh->flow_id_pool = mlx5_flow_id_pool_alloc
((1 << HAIRPIN_FLOW_ID_BITS) - 1);
pthread_mutex_destroy(&sh->txpp.mutex);
pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
MLX5_ASSERT(sh);
- if (sh->cnt_id_tbl) {
+ if (sh->cnt_id_tbl)
mlx5_l3t_destroy(sh->cnt_id_tbl);
- sh->cnt_id_tbl = NULL;
- }
- if (sh->tx_uar) {
- mlx5_glue->devx_free_uar(sh->tx_uar);
- sh->tx_uar = NULL;
- }
if (sh->tis)
claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->devx_rx_uar)
+ mlx5_glue->devx_free_uar(sh->devx_rx_uar);
+ if (sh->tx_uar)
+ mlx5_glue->devx_free_uar(sh->tx_uar);
if (sh->pd)
claim_zero(mlx5_glue->dealloc_pd(sh->pd));
if (sh->ctx)
mlx5_mr_release_cache(&sh->share_cache);
/* Remove context from the global device list. */
LIST_REMOVE(sh, next);
+ pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
/*
* Ensure there is no async event handler installed.
* Only primary process handles async device events.
claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->devx_rx_uar)
+ mlx5_glue->devx_free_uar(sh->devx_rx_uar);
if (sh->ctx)
claim_zero(mlx5_glue->close_device(sh->ctx));
if (sh->flow_id_pool)
mlx5_flow_id_pool_release(sh->flow_id_pool);
pthread_mutex_destroy(&sh->txpp.mutex);
mlx5_free(sh);
+ return;
exit:
pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
}
return err;
}
-/**
- * Initialize shared data between primary and secondary process.
- *
- * A memzone is reserved by primary process and secondary processes attach to
- * the memzone.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_init_shared_data(void)
-{
- const struct rte_memzone *mz;
- int ret = 0;
-
- rte_spinlock_lock(&mlx5_shared_data_lock);
- if (mlx5_shared_data == NULL) {
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- /* Allocate shared memory. */
- mz = rte_memzone_reserve(MZ_MLX5_PMD_SHARED_DATA,
- sizeof(*mlx5_shared_data),
- SOCKET_ID_ANY, 0);
- if (mz == NULL) {
- DRV_LOG(ERR,
- "Cannot allocate mlx5 shared data");
- ret = -rte_errno;
- goto error;
- }
- mlx5_shared_data = mz->addr;
- memset(mlx5_shared_data, 0, sizeof(*mlx5_shared_data));
- rte_spinlock_init(&mlx5_shared_data->lock);
- } else {
- /* Lookup allocated shared memory. */
- mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
- if (mz == NULL) {
- DRV_LOG(ERR,
- "Cannot attach mlx5 shared data");
- ret = -rte_errno;
- goto error;
- }
- mlx5_shared_data = mz->addr;
- memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
- }
- }
-error:
- rte_spinlock_unlock(&mlx5_shared_data_lock);
- return ret;
-}
-
/**
* Retrieve integer value from environment variable.
*
dev->tx_pkt_burst = removed_tx_burst;
rte_wmb();
/* Disable datapath on secondary process. */
- mlx5_mp_req_stop_rxtx(dev);
+ mlx5_mp_os_req_stop_rxtx(dev);
/* Free the eCPRI flex parser resource. */
mlx5_flex_parser_ecpri_release(dev);
if (priv->rxqs != NULL) {
config->reclaim_mode = tmp;
} else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) {
config->sys_mem_en = !!tmp;
+ } else if (strcmp(MLX5_DECAP_EN, key) == 0) {
+ config->decap_en = !!tmp;
} else {
DRV_LOG(WARNING, "%s: unknown parameter", key);
rte_errno = EINVAL;
MLX5_HP_BUF_SIZE,
MLX5_RECLAIM_MEM,
MLX5_SYS_MEM_EN,
+ MLX5_DECAP_EN,
NULL,
};
struct rte_kvargs *kvlist;
return 0;
}
-/**
- * PMD global initialization.
- *
- * Independent from individual device, this function initializes global
- * per-PMD data structures distinguishing primary and secondary processes.
- * Hence, each initialization is called once per a process.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_init_once(void)
-{
- struct mlx5_shared_data *sd;
- struct mlx5_local_data *ld = &mlx5_local_data;
- int ret = 0;
-
- if (mlx5_init_shared_data())
- return -rte_errno;
- sd = mlx5_shared_data;
- MLX5_ASSERT(sd);
- rte_spinlock_lock(&sd->lock);
- switch (rte_eal_process_type()) {
- case RTE_PROC_PRIMARY:
- if (sd->init_done)
- break;
- LIST_INIT(&sd->mem_event_cb_list);
- rte_rwlock_init(&sd->mem_event_rwlock);
- rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
- mlx5_mr_mem_event_cb, NULL);
- ret = mlx5_mp_init_primary(MLX5_MP_NAME,
- mlx5_mp_primary_handle);
- if (ret)
- goto out;
- sd->init_done = true;
- break;
- case RTE_PROC_SECONDARY:
- if (ld->init_done)
- break;
- ret = mlx5_mp_init_secondary(MLX5_MP_NAME,
- mlx5_mp_secondary_handle);
- if (ret)
- goto out;
- ++sd->secondary_cnt;
- ld->init_done = true;
- break;
- default:
- break;
- }
-out:
- rte_spinlock_unlock(&sd->lock);
- return ret;
-}
-
/**
* Configures the minimal amount of data to inline into WQE
* while sending packets.
}
};
-struct rte_pci_driver mlx5_driver = {
- .driver = {
- .name = MLX5_DRIVER_NAME
+static struct mlx5_pci_driver mlx5_driver = {
+ .driver_class = MLX5_CLASS_NET,
+ .pci_driver = {
+ .driver = {
+ .name = MLX5_DRIVER_NAME,
+ },
+ .id_table = mlx5_pci_id_map,
+ .probe = mlx5_os_pci_probe,
+ .remove = mlx5_pci_remove,
+ .dma_map = mlx5_dma_map,
+ .dma_unmap = mlx5_dma_unmap,
+ .drv_flags = PCI_DRV_FLAGS,
},
- .id_table = mlx5_pci_id_map,
- .probe = mlx5_os_pci_probe,
- .remove = mlx5_pci_remove,
- .dma_map = mlx5_dma_map,
- .dma_unmap = mlx5_dma_unmap,
- .drv_flags = PCI_DRV_FLAGS,
};
/* Initialize driver log type. */
*/
RTE_INIT(rte_mlx5_pmd_init)
{
+ mlx5_common_init();
/* Build the static tables for Verbs conversion. */
mlx5_set_ptype_table();
mlx5_set_cksum_table();
mlx5_set_swp_types_table();
if (mlx5_glue)
- rte_pci_register(&mlx5_driver);
+ mlx5_pci_driver_register(&mlx5_driver);
}
RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__);