From 4c204fe5e5d2bf0001ab0d017df9f0f578b0434c Mon Sep 17 00:00:00 2001 From: Shiri Kuzin Date: Tue, 12 May 2020 15:21:46 +0300 Subject: [PATCH] common/mlx5: disable relaxed ordering in unsuitable CPUs Relaxed ordering is a PCI optimization that enables reordering reads/writes in order to improve performance. Relaxed ordering was enabled for all processors causing a degradation in performance in Haswell and Broadwell processors that don't support this optimization. In order to avoid that we check if the processor is Haswell or Broadwell and if so we disable relaxed ordering. Signed-off-by: Shiri Kuzin Acked-by: Matan Azrad --- drivers/common/mlx5/mlx5_common.c | 82 ++++++++++++++++++++++++++++ drivers/common/mlx5/mlx5_common.h | 2 + drivers/common/mlx5/mlx5_common_mr.c | 6 +- drivers/net/mlx5/mlx5_flow_dv.c | 3 +- 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c index 42610459f7..1c77763da9 100644 --- a/drivers/common/mlx5/mlx5_common.c +++ b/drivers/common/mlx5/mlx5_common.c @@ -22,6 +22,8 @@ int mlx5_common_logtype; const struct mlx5_glue *mlx5_glue; #endif +uint8_t haswell_broadwell_cpu; + /** * Get PCI information by sysfs device path. * @@ -292,6 +294,29 @@ glue_error: #endif +/* In case this is an x86_64 intel processor to check if + * we should use relaxed ordering. + */ +#ifdef RTE_ARCH_X86_64 +/** + * This function returns processor identification and feature information + * into the registers. + * + * @param eax, ebx, ecx, edx + * Pointers to the registers that will hold cpu information. + * @param level + * The main category of information returned. + */ +static inline void mlx5_cpu_id(unsigned int level, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + __asm__("cpuid\n\t" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (level)); +} +#endif + RTE_INIT_PRIO(mlx5_log_init, LOG) { mlx5_common_logtype = rte_log_register("pmd.common.mlx5"); @@ -350,3 +375,60 @@ glue_error: mlx5_glue = NULL; return; } + +/** + * This function is responsible of initializing the variable + * haswell_broadwell_cpu by checking if the cpu is intel + * and reading the data returned from mlx5_cpu_id(). + * since haswell and broadwell cpus don't have improved performance + * when using relaxed ordering we want to check the cpu type before + * before deciding whether to enable RO or not. + * if the cpu is haswell or broadwell the variable will be set to 1 + * otherwise it will be 0. + */ +RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) +{ +#ifdef RTE_ARCH_X86_64 + unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; + unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; + unsigned int i, model, family, brand_id, vendor; + unsigned int signature_intel_ebx = 0x756e6547; + unsigned int extended_model; + unsigned int eax = 0; + unsigned int ebx = 0; + unsigned int ecx = 0; + unsigned int edx = 0; + int max_level; + + mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); + vendor = ebx; + max_level = eax; + if (max_level < 1) { + haswell_broadwell_cpu = 0; + return; + } + mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); + model = (eax >> 4) & 0x0f; + family = (eax >> 8) & 0x0f; + brand_id = ebx & 0xff; + extended_model = (eax >> 12) & 0xf0; + /* Check if the processor is Haswell or Broadwell */ + if (vendor == signature_intel_ebx) { + if (family == 0x06) + model += extended_model; + if (brand_id == 0 && family == 0x6) { + for (i = 0; i < RTE_DIM(broadwell_models); i++) + if (model == broadwell_models[i]) { + haswell_broadwell_cpu = 1; + return; + } + for (i = 0; i < RTE_DIM(haswell_models); i++) + if (model == haswell_models[i]) { + haswell_broadwell_cpu = 1; + return; + } + } + } +#endif + haswell_broadwell_cpu = 0; +} diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index b37b82096b..8cd3ea590d 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -213,4 +213,6 @@ __rte_internal void mlx5_translate_port_name(const char *port_name_in, struct mlx5_switch_info *port_info_out); +extern uint8_t haswell_broadwell_cpu; + #endif /* RTE_PMD_MLX5_COMMON_H_ */ diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c index 9d4a06dd5b..3b46446460 100644 --- a/drivers/common/mlx5/mlx5_common_mr.c +++ b/drivers/common/mlx5/mlx5_common_mr.c @@ -770,7 +770,8 @@ alloc_resources: */ mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)data.start, len, IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_RELAXED_ORDERING); + (haswell_broadwell_cpu ? 0 : + IBV_ACCESS_RELAXED_ORDERING)); if (mr->ibv_mr == NULL) { DEBUG("Fail to create a verbs MR for address (%p)", (void *)addr); @@ -1045,7 +1046,8 @@ mlx5_create_mr_ext(struct ibv_pd *pd, uintptr_t addr, size_t len, int socket_id) return NULL; mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)addr, len, IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_RELAXED_ORDERING); + (haswell_broadwell_cpu ? 0 : + IBV_ACCESS_RELAXED_ORDERING)); if (mr->ibv_mr == NULL) { DRV_LOG(WARNING, "Fail to create a verbs MR for address (%p)", diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index c7702c51d3..8b018fb3cf 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4134,7 +4134,8 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) mkey_attr.klm_array = NULL; mkey_attr.klm_num = 0; if (priv->config.hca_attr.relaxed_ordering_write && - priv->config.hca_attr.relaxed_ordering_read) + priv->config.hca_attr.relaxed_ordering_read && + !haswell_broadwell_cpu) mkey_attr.relaxed_ordering = 1; mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); if (!mem_mng->dm) { -- 2.20.1