#include <stdlib.h>
#include <errno.h>
#include <net/if.h>
+#include <fcntl.h>
#include <sys/mman.h>
#include <linux/rtnetlink.h>
*/
#define MLX5_HP_BUF_SIZE "hp_buf_log_sz"
+/* Flow memory reclaim mode. */
+#define MLX5_RECLAIM_MEM "reclaim_mem_mode"
+
#ifndef HAVE_IBV_MLX5_MOD_MPW
#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER;
static struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
{
.size = sizeof(struct mlx5_flow_dv_encap_decap_resource),
.trunk_size = 64,
.free = rte_free,
.type = "mlx5_encap_decap_ipool",
},
+ {
+ .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_push_vlan_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_dv_tag_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_tag_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_dv_port_id_action_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_port_id_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_tbl_data_entry),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_jump_ipool",
+ },
+#endif
+ {
+ .size = sizeof(struct mlx5_flow_meter),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_meter_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_mreg_copy_resource),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_mcp_ipool",
+ },
+ {
+ .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_hrxq_ipool",
+ },
+ {
+ .size = sizeof(struct mlx5_flow_handle),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 0,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "mlx5_flow_handle_ipool",
+ },
+ {
+ .size = sizeof(struct rte_flow),
+ .trunk_size = 4096,
+ .need_lock = 1,
+ .release_mem_en = 1,
+ .malloc = rte_malloc_socket,
+ .free = rte_free,
+ .type = "rte_flow_ipool",
+ },
};
return 0;
}
+/**
+ * Initialize the shared aging list information per port.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object.
+ */
+static void
+mlx5_flow_aging_init(struct mlx5_ibv_shared *sh)
+{
+ uint32_t i;
+ struct mlx5_age_info *age_info;
+
+ for (i = 0; i < sh->max_port; i++) {
+ age_info = &sh->port[i].age_info;
+ age_info->flags = 0;
+ TAILQ_INIT(&age_info->aged_counters);
+ rte_spinlock_init(&age_info->aged_sl);
+ MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
+ }
+}
+
/**
* Initialize the counters management structure.
*
static void
mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
{
- uint8_t i;
+ int i;
+ memset(&sh->cmng, 0, sizeof(sh->cmng));
TAILQ_INIT(&sh->cmng.flow_counters);
- for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
+ for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+ rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
+ }
}
/**
mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
{
struct mlx5_counter_stats_mem_mng *mng;
- uint8_t i;
+ int i;
int j;
int retries = 1024;
break;
rte_pause();
}
- for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
+ for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
struct mlx5_flow_counter_pool *pool;
- uint32_t batch = !!(i % 2);
+ uint32_t batch = !!(i > 1);
if (!sh->cmng.ccont[i].pools)
continue;
pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
while (pool) {
- if (batch) {
- if (pool->min_dcs)
- claim_zero
- (mlx5_devx_cmd_destroy(pool->min_dcs));
- }
+ if (batch && pool->min_dcs)
+ claim_zero(mlx5_devx_cmd_destroy
+ (pool->min_dcs));
for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
- if (pool->counters_raw[j].action)
+ if (MLX5_POOL_GET_CNT(pool, j)->action)
claim_zero
- (mlx5_glue->destroy_flow_action
- (pool->counters_raw[j].action));
+ (mlx5_glue->destroy_flow_action
+ (MLX5_POOL_GET_CNT
+ (pool, j)->action));
if (!batch && MLX5_GET_POOL_CNT_EXT
(pool, j)->dcs)
claim_zero(mlx5_devx_cmd_destroy
- (MLX5_GET_POOL_CNT_EXT
- (pool, j)->dcs));
+ (MLX5_GET_POOL_CNT_EXT
+ (pool, j)->dcs));
}
- TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
- next);
+ TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next);
rte_free(pool);
pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
}
*
* @param[in] sh
* Pointer to mlx5_ibv_shared object.
+ * @param[in] sh
+ * Pointer to user dev config.
*/
static void
-mlx5_flow_ipool_create(struct mlx5_ibv_shared *sh)
+mlx5_flow_ipool_create(struct mlx5_ibv_shared *sh,
+ const struct mlx5_dev_config *config __rte_unused)
{
uint8_t i;
- for (i = 0; i < MLX5_IPOOL_MAX; ++i)
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ /*
+ * While DV is supported, user chooses the verbs mode,
+ * the mlx5 flow handle size is different with the
+ * MLX5_FLOW_HANDLE_VERBS_SIZE.
+ */
+ if (!config->dv_flow_en)
+ mlx5_ipool_cfg[MLX5_IPOOL_MLX5_FLOW].size =
+ MLX5_FLOW_HANDLE_VERBS_SIZE;
+#endif
+ for (i = 0; i < MLX5_IPOOL_MAX; ++i) {
+ if (config->reclaim_mode)
+ mlx5_ipool_cfg[i].release_mem_en = 1;
sh->ipool[i] = mlx5_ipool_create(&mlx5_ipool_cfg[i]);
+ }
}
/**
setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
}
+/**
+ * Install shared asynchronous device events handler.
+ * This function is implemented to support event sharing
+ * between multiple ports of single IB device.
+ *
+ * @param sh
+ * Pointer to mlx5_ibv_shared object.
+ */
+static void
+mlx5_dev_shared_handler_install(struct mlx5_ibv_shared *sh)
+{
+ int ret;
+ int flags;
+
+ sh->intr_handle.fd = -1;
+ flags = fcntl(sh->ctx->async_fd, F_GETFL);
+ ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
+ if (ret) {
+ DRV_LOG(INFO, "failed to change file descriptor async event"
+ " queue");
+ } else {
+ sh->intr_handle.fd = sh->ctx->async_fd;
+ sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
+ if (rte_intr_callback_register(&sh->intr_handle,
+ mlx5_dev_interrupt_handler, sh)) {
+ DRV_LOG(INFO, "Fail to install the shared interrupt.");
+ sh->intr_handle.fd = -1;
+ }
+ }
+ if (sh->devx) {
+#ifdef HAVE_IBV_DEVX_ASYNC
+ sh->intr_handle_devx.fd = -1;
+ sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
+ if (!sh->devx_comp) {
+ DRV_LOG(INFO, "failed to allocate devx_comp.");
+ return;
+ }
+ flags = fcntl(sh->devx_comp->fd, F_GETFL);
+ ret = fcntl(sh->devx_comp->fd, F_SETFL, flags | O_NONBLOCK);
+ if (ret) {
+ DRV_LOG(INFO, "failed to change file descriptor"
+ " devx comp");
+ return;
+ }
+ sh->intr_handle_devx.fd = sh->devx_comp->fd;
+ sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
+ if (rte_intr_callback_register(&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx, sh)) {
+ DRV_LOG(INFO, "Fail to install the devx shared"
+ " interrupt.");
+ sh->intr_handle_devx.fd = -1;
+ }
+#endif /* HAVE_IBV_DEVX_ASYNC */
+ }
+}
+
+/**
+ * Uninstall shared asynchronous device events handler.
+ * This function is implemented to support event sharing
+ * between multiple ports of single IB device.
+ *
+ * @param dev
+ * Pointer to mlx5_ibv_shared object.
+ */
+static void
+mlx5_dev_shared_handler_uninstall(struct mlx5_ibv_shared *sh)
+{
+ if (sh->intr_handle.fd >= 0)
+ mlx5_intr_callback_unregister(&sh->intr_handle,
+ mlx5_dev_interrupt_handler, sh);
+#ifdef HAVE_IBV_DEVX_ASYNC
+ if (sh->intr_handle_devx.fd >= 0)
+ rte_intr_callback_unregister(&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx, sh);
+ if (sh->devx_comp)
+ mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
+#endif
+}
+
/**
* Allocate shared IB device context. If there is multiport device the
* master and representors will share this context, if there is single
sizeof(sh->ibdev_name));
strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path,
sizeof(sh->ibdev_path));
- pthread_mutex_init(&sh->intr_mutex, NULL);
/*
* Setting port_id to max unallowed value means
* there is no interrupt subhandler installed for
goto error;
}
}
- sh->flow_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX);
+ sh->flow_id_pool = mlx5_flow_id_pool_alloc
+ ((1 << HAIRPIN_FLOW_ID_BITS) - 1);
if (!sh->flow_id_pool) {
DRV_LOG(ERR, "can't create flow id pool");
err = ENOMEM;
err = rte_errno;
goto error;
}
+ mlx5_dev_shared_handler_install(sh);
+ mlx5_flow_aging_init(sh);
mlx5_flow_counters_mng_init(sh);
- mlx5_flow_ipool_create(sh);
+ mlx5_flow_ipool_create(sh, config);
/* Add device to memory callback list. */
rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
**/
mlx5_flow_counters_mng_close(sh);
mlx5_flow_ipool_destroy(sh);
- MLX5_ASSERT(!sh->intr_cnt);
- if (sh->intr_cnt)
- mlx5_intr_callback_unregister
- (&sh->intr_handle, mlx5_dev_interrupt_handler, sh);
-#ifdef HAVE_MLX5_DEVX_ASYNC_SUPPORT
- if (sh->devx_intr_cnt) {
- if (sh->intr_handle_devx.fd)
- rte_intr_callback_unregister(&sh->intr_handle_devx,
- mlx5_dev_interrupt_handler_devx, sh);
- if (sh->devx_comp)
- mlx5dv_devx_destroy_cmd_comp(sh->devx_comp);
- }
-#endif
- pthread_mutex_destroy(&sh->intr_mutex);
+ mlx5_dev_shared_handler_uninstall(sh);
if (sh->pd)
claim_zero(mlx5_glue->dealloc_pd(sh->pd));
if (sh->tis)
sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
}
#endif
+ if (priv->config.reclaim_mode == MLX5_RCM_AGGR) {
+ mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
+ mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1);
+ if (sh->fdb_domain)
+ mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1);
+ }
sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
#endif /* HAVE_MLX5DV_DR */
sh->dv_refcnt++;
unsigned int i;
int ret;
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ /* Check if process_private released. */
+ if (!dev->process_private)
+ return;
+ mlx5_tx_uar_uninit_secondary(dev);
+ mlx5_proc_priv_uninit(dev);
+ rte_eth_dev_release_port(dev);
+ return;
+ }
+ if (!priv->sh)
+ return;
DRV_LOG(DEBUG, "port %u closing device \"%s\"",
dev->data->port_id,
((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
- /* In case mlx5_dev_stop() has not been called. */
- mlx5_dev_interrupt_handler_uninstall(dev);
- mlx5_dev_interrupt_handler_devx_uninstall(dev);
/*
* If default mreg copy action is removed at the stop stage,
* the search will return none and nothing will be done anymore.
close(priv->nl_socket_rdma);
if (priv->vmwa_context)
mlx5_vlan_vmwa_exit(priv->vmwa_context);
- if (priv->sh) {
- /*
- * Free the shared context in last turn, because the cleanup
- * routines above may use some shared fields, like
- * mlx5_nl_mac_addr_flush() uses ibdev_path for retrieveing
- * ifindex if Netlink fails.
- */
- mlx5_free_shared_ibctx(priv->sh);
- priv->sh = NULL;
- }
ret = mlx5_hrxq_verify(dev);
if (ret)
DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
if (ret)
DRV_LOG(WARNING, "port %u some flows still remain",
dev->data->port_id);
+ /*
+ * Free the shared context in last turn, because the cleanup
+ * routines above may use some shared fields, like
+ * mlx5_nl_mac_addr_flush() uses ibdev_path for retrieveing
+ * ifindex if Netlink fails.
+ */
+ mlx5_free_shared_ibctx(priv->sh);
if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
unsigned int c = 0;
uint16_t port_id;
DRV_LOG(DEBUG, "class argument is %s.", val);
} else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) {
config->log_hp_size = tmp;
+ } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) {
+ if (tmp != MLX5_RCM_NONE &&
+ tmp != MLX5_RCM_LIGHT &&
+ tmp != MLX5_RCM_AGGR) {
+ DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ config->reclaim_mode = tmp;
} else {
DRV_LOG(WARNING, "%s: unknown parameter", key);
rte_errno = EINVAL;
MLX5_LRO_TIMEOUT_USEC,
MLX5_CLASS_ARG_NAME,
MLX5_HP_BUF_SIZE,
+ MLX5_RECLAIM_MEM,
NULL,
};
struct rte_kvargs *kvlist;
i++)
; /* Empty. */
/* Find the first clear bit. */
+ MLX5_ASSERT(i < MLX5_DBR_BITMAP_SIZE);
j = rte_bsf64(~page->dbr_bitmap[i]);
- MLX5_ASSERT(i < (MLX5_DBR_PER_PAGE / 64));
- page->dbr_bitmap[i] |= (1 << j);
+ page->dbr_bitmap[i] |= (UINT64_C(1) << j);
page->dbr_count++;
*dbr_page = page;
return (((i * 64) + j) * sizeof(uint64_t));
int i = offset / 64;
int j = offset % 64;
- page->dbr_bitmap[i] &= ~(1 << j);
+ page->dbr_bitmap[i] &= ~(UINT64_C(1) << j);
}
return ret;
}
/* Receive command fd from primary process */
err = mlx5_mp_req_verbs_cmd_fd(&mp_id);
if (err < 0)
- return NULL;
+ goto err_secondary;
/* Remap UAR for Tx queues. */
err = mlx5_tx_uar_init_secondary(eth_dev, err);
if (err)
- return NULL;
+ goto err_secondary;
/*
* Ethdev pointer is still required as input since
* the primary device is not accessible from the
eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);
eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);
return eth_dev;
+err_secondary:
+ mlx5_dev_close(eth_dev);
+ return NULL;
}
/*
* Some parameters ("tx_db_nc" in particularly) are needed in
mlx5_ifindex(eth_dev),
eth_dev->data->mac_addrs,
MLX5_MAX_MAC_ADDRESSES);
- TAILQ_INIT(&priv->flows);
- TAILQ_INIT(&priv->ctrl_flows);
+ priv->flows = 0;
+ priv->ctrl_flows = 0;
TAILQ_INIT(&priv->flow_meters);
TAILQ_INIT(&priv->flow_meter_profiles);
/* Hint libmlx5 to use PMD allocator for data plane resources */
/*
* Single IB device with multiple ports found,
* it may be E-Switch master device and representors.
- * We have to perform identification trough the ports.
+ * We have to perform identification through the ports.
*/
MLX5_ASSERT(nl_rdma >= 0);
MLX5_ASSERT(ns == 0);
rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
/* Restore non-PCI flags cleared by the above call. */
list[i].eth_dev->data->dev_flags |= restore;
- mlx5_dev_interrupt_handler_devx_install(list[i].eth_dev);
rte_eth_dev_probing_finish(list[i].eth_dev);
}
if (i != ns) {
{
uint16_t port_id;
- RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device)
- rte_eth_dev_close(port_id);
+ RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+ /*
+ * mlx5_dev_close() is not registered to secondary process,
+ * call the close function explicitly for secondary process.
+ */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ mlx5_dev_close(&rte_eth_devices[port_id]);
+ else
+ rte_eth_dev_close(port_id);
+ }
return 0;
}