#include <stdint.h>
#include <stdlib.h>
#include <errno.h>
-#include <net/if.h>
-#include <sys/mman.h>
-#include <linux/rtnetlink.h>
#include <rte_malloc.h>
#include <rte_ethdev_driver.h>
#include <mlx5_common.h>
#include <mlx5_common_os.h>
#include <mlx5_common_mp.h>
+#include <mlx5_common_pci.h>
#include <mlx5_malloc.h>
#include "mlx5_defs.h"
.free = mlx5_free,
.type = "mlx5_jump_ipool",
},
+ {
+ .size = sizeof(struct mlx5_flow_dv_sample_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = mlx5_malloc,
+ .free = mlx5_free,
+ .type = "mlx5_sample_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_dv_dest_array_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = mlx5_malloc,
+ .free = mlx5_free,
+ .type = "mlx5_dest_array_ipool",
+ },
#endif
{
.size = sizeof(struct mlx5_flow_meter),
memset(&sh->cmng, 0, sizeof(sh->cmng));
TAILQ_INIT(&sh->cmng.flow_counters);
- for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
- sh->cmng.ccont[i].min_id = MLX5_CNT_BATCH_OFFSET;
- sh->cmng.ccont[i].max_id = -1;
- sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID;
- TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
- rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
- TAILQ_INIT(&sh->cmng.ccont[i].counters);
- rte_spinlock_init(&sh->cmng.ccont[i].csl);
+ sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET;
+ sh->cmng.max_id = -1;
+ sh->cmng.last_pool_idx = POOL_IDX_INVALID;
+ rte_spinlock_init(&sh->cmng.pool_update_sl);
+ for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
+ TAILQ_INIT(&sh->cmng.counters[i]);
+ rte_spinlock_init(&sh->cmng.csl[i]);
}
}
mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
{
struct mlx5_counter_stats_mem_mng *mng;
- int i;
- int j;
+ int i, j;
int retries = 1024;
rte_errno = 0;
break;
rte_pause();
}
- for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
+
+ if (sh->cmng.pools) {
struct mlx5_flow_counter_pool *pool;
- uint32_t batch = !!(i > 1);
+ uint16_t n_valid = sh->cmng.n_valid;
+ bool fallback = sh->cmng.counter_fallback;
- if (!sh->cmng.ccont[i].pools)
- continue;
- pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
- while (pool) {
- if (batch && pool->min_dcs)
+ for (i = 0; i < n_valid; ++i) {
+ pool = sh->cmng.pools[i];
+ if (!fallback && pool->min_dcs)
claim_zero(mlx5_devx_cmd_destroy
(pool->min_dcs));
for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
- if (MLX5_POOL_GET_CNT(pool, j)->action)
+ struct mlx5_flow_counter *cnt =
+ MLX5_POOL_GET_CNT(pool, j);
+
+ if (cnt->action)
claim_zero
(mlx5_glue->destroy_flow_action
- (MLX5_POOL_GET_CNT
- (pool, j)->action));
- if (!batch && MLX5_GET_POOL_CNT_EXT
- (pool, j)->dcs)
+ (cnt->action));
+ if (fallback && MLX5_POOL_GET_CNT
+ (pool, j)->dcs_when_free)
claim_zero(mlx5_devx_cmd_destroy
- (MLX5_GET_POOL_CNT_EXT
- (pool, j)->dcs));
+ (cnt->dcs_when_free));
}
- TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next);
mlx5_free(pool);
- pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
}
- mlx5_free(sh->cmng.ccont[i].pools);
+ mlx5_free(sh->cmng.pools);
}
mng = LIST_FIRST(&sh->cmng.mem_mngs);
while (mng) {
prf->obj = NULL;
}
+/*
+ * Allocate Rx and Tx UARs in robust fashion.
+ * This routine handles the following UAR allocation issues:
+ *
+ * - tries to allocate the UAR with the most appropriate memory
+ * mapping type from the ones supported by the host
+ *
+ * - tries to allocate the UAR with non-NULL base address
+ * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as
+ * UAR base address if UAR was not the first object in the UAR page.
+ * It caused the PMD failure and we should try to get another UAR
+ * till we get the first one with non-NULL base address returned.
+ */
+static int
+mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
+ const struct mlx5_dev_config *config)
+{
+ uint32_t uar_mapping, retry;
+ int err = 0;
+ void *base_addr;
+
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ /* Control the mapping type according to the settings. */
+ uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ?
+ MLX5DV_UAR_ALLOC_TYPE_NC :
+ MLX5DV_UAR_ALLOC_TYPE_BF;
+#else
+ RTE_SET_USED(config);
+ /*
+ * It seems we have no way to control the memory mapping type
+ * for the UAR, the default "Write-Combining" type is supposed.
+ * The UAR initialization on queue creation queries the
+ * actual mapping type done by Verbs/kernel and setups the
+ * PMD datapath accordingly.
+ */
+ uar_mapping = 0;
+#endif
+ sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ if (config->dbnc == MLX5_TXDB_CACHED ||
+ config->dbnc == MLX5_TXDB_HEURISTIC)
+ DRV_LOG(WARNING, "Devarg tx_db_nc setting "
+ "is not supported by DevX");
+ /*
+ * In some environments like virtual machine
+ * the Write Combining mapped might be not supported
+ * and UAR allocation fails. We try "Non-Cached"
+ * mapping for the case. The tx_burst routines take
+ * the UAR mapping type into account on UAR setup
+ * on queue creation.
+ */
+ DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ } else if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
+ if (config->dbnc == MLX5_TXDB_NCACHED)
+ DRV_LOG(WARNING, "Devarg tx_db_nc settings "
+ "is not supported by DevX");
+ /*
+ * If Verbs/kernel does not support "Non-Cached"
+ * try the "Write-Combining".
+ */
+ DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (NC)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar);
+ if (base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(WARNING, "Retrying to allocate Tx DevX UAR");
+ sh->tx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)");
+ err = ENOMEM;
+ goto exit;
+ }
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+ uar_mapping = 0;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->devx_rx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ /*
+ * Rx UAR is used to control interrupts only,
+ * should be no datapath noticeable impact,
+ * can try "Non-Cached" mapping safely.
+ */
+ DRV_LOG(WARNING, "Failed to allocate Rx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
+ if (base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(WARNING, "Retrying to allocate Rx DevX UAR");
+ sh->devx_rx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)");
+ err = ENOMEM;
+ }
+exit:
+ return err;
+}
+
/**
* Allocate shared device context. If there is multiport device the
* master and representors will share this context, if there is single
goto error;
}
if (sh->devx) {
+ /* Query the EQN for this core. */
+ err = mlx5_glue->devx_query_eqn(sh->ctx, 0, &sh->eqn);
+ if (err) {
+ rte_errno = errno;
+ DRV_LOG(ERR, "Failed to query event queue number %d.",
+ rte_errno);
+ goto error;
+ }
err = mlx5_os_get_pdn(sh->pd, &sh->pdn);
if (err) {
DRV_LOG(ERR, "Fail to extract pdn from PD");
err = ENOMEM;
goto error;
}
- sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, 0);
- if (!sh->tx_uar) {
- DRV_LOG(ERR, "Failed to allocate DevX UAR.");
- err = ENOMEM;
+ err = mlx5_alloc_rxtx_uars(sh, config);
+ if (err)
goto error;
- }
+ MLX5_ASSERT(sh->tx_uar);
+ MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar));
+
+ MLX5_ASSERT(sh->devx_rx_uar);
+ MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar));
}
sh->flow_id_pool = mlx5_flow_id_pool_alloc
((1 << HAIRPIN_FLOW_ID_BITS) - 1);
pthread_mutex_destroy(&sh->txpp.mutex);
pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
MLX5_ASSERT(sh);
- if (sh->cnt_id_tbl) {
+ if (sh->cnt_id_tbl)
mlx5_l3t_destroy(sh->cnt_id_tbl);
- sh->cnt_id_tbl = NULL;
- }
- if (sh->tx_uar) {
- mlx5_glue->devx_free_uar(sh->tx_uar);
- sh->tx_uar = NULL;
- }
if (sh->tis)
claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->devx_rx_uar)
+ mlx5_glue->devx_free_uar(sh->devx_rx_uar);
+ if (sh->tx_uar)
+ mlx5_glue->devx_free_uar(sh->tx_uar);
if (sh->pd)
claim_zero(mlx5_glue->dealloc_pd(sh->pd));
if (sh->ctx)
mlx5_mr_release_cache(&sh->share_cache);
/* Remove context from the global device list. */
LIST_REMOVE(sh, next);
+ pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
/*
* Ensure there is no async event handler installed.
* Only primary process handles async device events.
claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ if (sh->devx_rx_uar)
+ mlx5_glue->devx_free_uar(sh->devx_rx_uar);
if (sh->ctx)
claim_zero(mlx5_glue->close_device(sh->ctx));
if (sh->flow_id_pool)
mlx5_flow_id_pool_release(sh->flow_id_pool);
pthread_mutex_destroy(&sh->txpp.mutex);
mlx5_free(sh);
+ return;
exit:
pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
}
* @param dev
* Pointer to Ethernet device structure.
*/
-void
+int
mlx5_dev_close(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
/* Check if process_private released. */
if (!dev->process_private)
- return;
+ return 0;
mlx5_tx_uar_uninit_secondary(dev);
mlx5_proc_priv_uninit(dev);
rte_eth_dev_release_port(dev);
- return;
+ return 0;
}
if (!priv->sh)
- return;
+ return 0;
DRV_LOG(DEBUG, "port %u closing device \"%s\"",
dev->data->port_id,
((priv->sh->ctx != NULL) ?
if (priv->reta_idx != NULL)
mlx5_free(priv->reta_idx);
if (priv->config.vf)
- mlx5_nl_mac_addr_flush(priv->nl_socket_route, mlx5_ifindex(dev),
- dev->data->mac_addrs,
- MLX5_MAX_MAC_ADDRESSES, priv->mac_own);
+ mlx5_os_mac_addr_flush(dev);
if (priv->nl_socket_route >= 0)
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
/*
* Free the shared context in last turn, because the cleanup
* routines above may use some shared fields, like
- * mlx5_nl_mac_addr_flush() uses ibdev_path for retrieveing
+ * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing
* ifindex if Netlink fails.
*/
mlx5_free_shared_dev_ctx(priv->sh);
* it is freed when dev_private is freed.
*/
dev->data->mac_addrs = NULL;
+ return 0;
}
/**
mlx5_pci_remove(struct rte_pci_device *pci_dev)
{
uint16_t port_id;
+ int ret = 0;
RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
/*
* call the close function explicitly for secondary process.
*/
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
- mlx5_dev_close(&rte_eth_devices[port_id]);
+ ret |= mlx5_dev_close(&rte_eth_devices[port_id]);
else
- rte_eth_dev_close(port_id);
+ ret |= rte_eth_dev_close(port_id);
}
- return 0;
+ return ret == 0 ? 0 : -EIO;
}
static const struct rte_pci_id mlx5_pci_id_map[] = {
}
};
-struct rte_pci_driver mlx5_driver = {
- .driver = {
- .name = MLX5_DRIVER_NAME
+static struct mlx5_pci_driver mlx5_driver = {
+ .driver_class = MLX5_CLASS_NET,
+ .pci_driver = {
+ .driver = {
+ .name = MLX5_DRIVER_NAME,
+ },
+ .id_table = mlx5_pci_id_map,
+ .probe = mlx5_os_pci_probe,
+ .remove = mlx5_pci_remove,
+ .dma_map = mlx5_dma_map,
+ .dma_unmap = mlx5_dma_unmap,
+ .drv_flags = PCI_DRV_FLAGS,
},
- .id_table = mlx5_pci_id_map,
- .probe = mlx5_os_pci_probe,
- .remove = mlx5_pci_remove,
- .dma_map = mlx5_dma_map,
- .dma_unmap = mlx5_dma_unmap,
- .drv_flags = PCI_DRV_FLAGS,
};
/* Initialize driver log type. */
*/
RTE_INIT(rte_mlx5_pmd_init)
{
+ mlx5_common_init();
/* Build the static tables for Verbs conversion. */
mlx5_set_ptype_table();
mlx5_set_cksum_table();
mlx5_set_swp_types_table();
if (mlx5_glue)
- rte_pci_register(&mlx5_driver);
+ mlx5_pci_driver_register(&mlx5_driver);
}
RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__);