From a1da6f624c9b42035c231a8f44269f9530f55c86 Mon Sep 17 00:00:00 2001 From: Suanming Mou Date: Mon, 1 Jun 2020 14:09:43 +0800 Subject: [PATCH] net/mlx5: add reclaim memory mode Currently, when flow destroyed, some memory resources may still be kept as cached to help next time create flow more efficiently. Some system may need the resources to be more flexible with flow create and destroy. After peak time, with millions of flows destroyed, the system would prefer the resources to be reclaimed completely, no cache is needed. Then the resources can be allocated and used by other components. The system is not so sensitive about the flow insertion rate, but more care about the resources. Both DPDK mlx5 PMD driver and the low level component rdma-core have provided the flow resources to be configured cached or not, but there is no APIs or parameters exposed to user to configure the flow resources cache mode. In this case, introduce a new PMD devarg to let user configure the flow resources cache mode will be helpful. This commit is to add a new "reclaim_mem_mode" to help user configure if the destroyed flows' cache resources should be kept or not. Their will be three mode can be chosen: 1. 0(none). It means the flow resources will be cached as usual. The resources will be cached, helpful with flow insertion rate. 2. 1(light). It will only enable the DPDK PMD level resources reclaim. 3. 2(aggressive). Both DPDK PMD level and rdma-core low level will be configured as reclaimed mode. With these three mode, user can configure the resources cache mode with different levels. Signed-off-by: Suanming Mou Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 20 ++++++++++++++++++++ doc/guides/rel_notes/release_20_08.rst | 6 ++++++ drivers/net/mlx5/mlx5.c | 24 +++++++++++++++++++++++- drivers/net/mlx5/mlx5.h | 13 +++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index bb03df66a8..0ff3c53542 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -849,6 +849,26 @@ Driver options By default, the PMD will set this value to 16, which means that 9KB jumbo frames will be supported. +- ``reclaim_mem_mode`` parameter [int] + + Cache some resources in flow destroy will help flow recreation more efficient. + While some systems may require the all the resources can be reclaimed after + flow destroyed. + The parameter ``reclaim_mem_mode`` provides the option for user to configure + if the resource cache is needed or not. + + There are three options to choose: + + - 0. It means the flow resources will be cached as usual. The resources will + be cached, helpful with flow insertion rate. + + - 1. It will only enable the DPDK PMD level resources reclaim. + + - 2. Both DPDK PMD level and rdma-core low level will be configured as + reclaimed mode. + + By default, the PMD will set this value to 0. + .. _mlx5_firmware_config: Firmware configuration diff --git a/doc/guides/rel_notes/release_20_08.rst b/doc/guides/rel_notes/release_20_08.rst index 39064afbe9..dee4ccbb58 100644 --- a/doc/guides/rel_notes/release_20_08.rst +++ b/doc/guides/rel_notes/release_20_08.rst @@ -56,6 +56,12 @@ New Features Also, make sure to start the actual text at the margin. ========================================================= +* **Updated Mellanox mlx5 driver.** + + Updated Mellanox mlx5 driver with new features and improvements, including: + + * Added new PMD devarg ``reclaim_mem_mode``. + Removed Items ------------- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 469ff7368c..95a0f337fe 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -161,6 +161,9 @@ */ #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" +/* Flow memory reclaim mode. */ +#define MLX5_RECLAIM_MEM "reclaim_mem_mode" + #ifndef HAVE_IBV_MLX5_MOD_MPW #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2) #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3) @@ -577,8 +580,11 @@ mlx5_flow_ipool_create(struct mlx5_ibv_shared *sh, mlx5_ipool_cfg[MLX5_IPOOL_MLX5_FLOW].size = MLX5_FLOW_HANDLE_VERBS_SIZE; #endif - for (i = 0; i < MLX5_IPOOL_MAX; ++i) + for (i = 0; i < MLX5_IPOOL_MAX; ++i) { + if (config->reclaim_mode) + mlx5_ipool_cfg[i].release_mem_en = 1; sh->ipool[i] = mlx5_ipool_create(&mlx5_ipool_cfg[i]); + } } /** @@ -1192,6 +1198,12 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop(); } #endif + if (priv->config.reclaim_mode == MLX5_RCM_AGGR) { + mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1); + mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1); + if (sh->fdb_domain) + mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1); + } sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan(); #endif /* HAVE_MLX5DV_DR */ sh->dv_refcnt++; @@ -1862,6 +1874,15 @@ mlx5_args_check(const char *key, const char *val, void *opaque) DRV_LOG(DEBUG, "class argument is %s.", val); } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { config->log_hp_size = tmp; + } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { + if (tmp != MLX5_RCM_NONE && + tmp != MLX5_RCM_LIGHT && + tmp != MLX5_RCM_AGGR) { + DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); + rte_errno = EINVAL; + return -rte_errno; + } + config->reclaim_mode = tmp; } else { DRV_LOG(WARNING, "%s: unknown parameter", key); rte_errno = EINVAL; @@ -1916,6 +1937,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) MLX5_LRO_TIMEOUT_USEC, MLX5_CLASS_ARG_NAME, MLX5_HP_BUF_SIZE, + MLX5_RECLAIM_MEM, NULL, }; struct rte_kvargs *kvlist; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 2908c8b8de..8e60897ff8 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -60,6 +60,18 @@ enum mlx5_ipool_index { MLX5_IPOOL_MAX, }; +/* + * There are three reclaim memory mode supported. + * 0(none) means no memory reclaim. + * 1(light) means only PMD level reclaim. + * 2(aggressive) means both PMD and rdma-core level reclaim. + */ +enum mlx5_reclaim_mem_mode { + MLX5_RCM_NONE, /* Don't reclaim memory. */ + MLX5_RCM_LIGHT, /* Reclaim PMD level. */ + MLX5_RCM_AGGR, /* Reclaim PMD and rdma-core level. */ +}; + /** Key string for IPC. */ #define MLX5_MP_NAME "net_mlx5_mp" @@ -160,6 +172,7 @@ struct mlx5_dev_config { unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */ unsigned int devx:1; /* Whether devx interface is available or not. */ unsigned int dest_tir:1; /* Whether advanced DR API is available. */ + unsigned int reclaim_mode:2; /* Memory reclaim mode. */ struct { unsigned int enabled:1; /* Whether MPRQ is enabled. */ unsigned int stride_num_n; /* Number of strides. */ -- 2.20.1