X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fcommon%2Fmlx5%2Fmlx5_common.c;h=c26a2cfa30d8da033c96d6abfa09aa761471c5d5;hb=8a3ba482c6da492afad05d9defd33974fd2fd87b;hp=9c88a639c1297d6d80b4dfe300c1f080e2fe4078;hpb=7b4f1e6bd367855716d84e95a1dd85ac3647a4c8;p=dpdk.git diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c index 9c88a639c1..c26a2cfa30 100644 --- a/drivers/common/mlx5/mlx5_common.c +++ b/drivers/common/mlx5/mlx5_common.c @@ -2,185 +2,222 @@ * Copyright 2019 Mellanox Technologies, Ltd */ -#include #include #include +#include #include +#include #include "mlx5_common.h" +#include "mlx5_common_os.h" #include "mlx5_common_utils.h" -#include "mlx5_glue.h" - +#include "mlx5_common_pci.h" int mlx5_common_logtype; +uint8_t haswell_broadwell_cpu; -#ifdef RTE_IBVERBS_LINK_DLOPEN - +/* In case this is an x86_64 intel processor to check if + * we should use relaxed ordering. + */ +#ifdef RTE_ARCH_X86_64 /** - * Suffix RTE_EAL_PMD_PATH with "-glue". - * - * This function performs a sanity check on RTE_EAL_PMD_PATH before - * suffixing its last component. + * This function returns processor identification and feature information + * into the registers. * - * @param buf[out] - * Output buffer, should be large enough otherwise NULL is returned. - * @param size - * Size of @p out. - * - * @return - * Pointer to @p buf or @p NULL in case suffix cannot be appended. + * @param eax, ebx, ecx, edx + * Pointers to the registers that will hold cpu information. + * @param level + * The main category of information returned. */ -static char * -mlx5_glue_path(char *buf, size_t size) +static inline void mlx5_cpu_id(unsigned int level, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { - static const char *const bad[] = { "/", ".", "..", NULL }; - const char *path = RTE_EAL_PMD_PATH; - size_t len = strlen(path); - size_t off; - int i; - - while (len && path[len - 1] == '/') - --len; - for (off = len; off && path[off - 1] != '/'; --off) - ; - for (i = 0; bad[i]; ++i) - if (!strncmp(path + off, bad[i], (int)(len - off))) - goto error; - i = snprintf(buf, size, "%.*s-glue", (int)len, path); - if (i == -1 || (size_t)i >= size) - goto error; - return buf; -error: - RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" - " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" - " re-configure DPDK"); - return NULL; + __asm__("cpuid\n\t" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (level)); } #endif -/** - * Initialization routine for run-time dependency on rdma-core. - */ -RTE_INIT_PRIO(mlx5_glue_init, CLASS) +RTE_INIT_PRIO(mlx5_log_init, LOG) { - void *handle = NULL; - - /* Initialize common log type. */ mlx5_common_logtype = rte_log_register("pmd.common.mlx5"); if (mlx5_common_logtype >= 0) rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE); - /* - * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use - * huge pages. Calling ibv_fork_init() during init allows - * applications to use fork() safely for purposes other than - * using this PMD, which is not supported in forked processes. - */ - setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); - /* Match the size of Rx completion entry to the size of a cacheline. */ - if (RTE_CACHE_LINE_SIZE == 128) - setenv("MLX5_CQE_SIZE", "128", 0); - /* - * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to - * cleanup all the Verbs resources even when the device was removed. - */ - setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); - /* The glue initialization was done earlier by mlx5 common library. */ -#ifdef RTE_IBVERBS_LINK_DLOPEN - char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; - const char *path[] = { - /* - * A basic security check is necessary before trusting - * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. - */ - (geteuid() == getuid() && getegid() == getgid() ? - getenv("MLX5_GLUE_PATH") : NULL), - /* - * When RTE_EAL_PMD_PATH is set, use its glue-suffixed - * variant, otherwise let dlopen() look up libraries on its - * own. - */ - (*RTE_EAL_PMD_PATH ? - mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), - }; - unsigned int i = 0; - void **sym; - const char *dlmsg; +} - while (!handle && i != RTE_DIM(path)) { - const char *end; - size_t len; - int ret; +static bool mlx5_common_initialized; - if (!path[i]) { - ++i; - continue; - } - end = strpbrk(path[i], ":;"); - if (!end) - end = path[i] + strlen(path[i]); - len = end - path[i]; - ret = 0; - do { - char name[ret + 1]; +/** + * One time innitialization routine for run-time dependency on glue library + * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, + * must invoke in its constructor. + */ +void +mlx5_common_init(void) +{ + if (mlx5_common_initialized) + return; - ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, - (int)len, path[i], - (!len || *(end - 1) == '/') ? "" : "/"); - if (ret == -1) - break; - if (sizeof(name) != (size_t)ret + 1) - continue; - DRV_LOG(DEBUG, "Looking for rdma-core glue as " - "\"%s\"", name); - handle = dlopen(name, RTLD_LAZY); - break; - } while (1); - path[i] = end + 1; - if (!*end) - ++i; - } - if (!handle) { - rte_errno = EINVAL; - dlmsg = dlerror(); - if (dlmsg) - DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); - goto glue_error; + mlx5_glue_constructor(); + mlx5_common_pci_init(); + mlx5_common_initialized = true; +} + +/** + * This function is responsible of initializing the variable + * haswell_broadwell_cpu by checking if the cpu is intel + * and reading the data returned from mlx5_cpu_id(). + * since haswell and broadwell cpus don't have improved performance + * when using relaxed ordering we want to check the cpu type before + * before deciding whether to enable RO or not. + * if the cpu is haswell or broadwell the variable will be set to 1 + * otherwise it will be 0. + */ +RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) +{ +#ifdef RTE_ARCH_X86_64 + unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; + unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; + unsigned int i, model, family, brand_id, vendor; + unsigned int signature_intel_ebx = 0x756e6547; + unsigned int extended_model; + unsigned int eax = 0; + unsigned int ebx = 0; + unsigned int ecx = 0; + unsigned int edx = 0; + int max_level; + + mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); + vendor = ebx; + max_level = eax; + if (max_level < 1) { + haswell_broadwell_cpu = 0; + return; } - sym = dlsym(handle, "mlx5_glue"); - if (!sym || !*sym) { - rte_errno = EINVAL; - dlmsg = dlerror(); - if (dlmsg) - DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); - goto glue_error; + mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); + model = (eax >> 4) & 0x0f; + family = (eax >> 8) & 0x0f; + brand_id = ebx & 0xff; + extended_model = (eax >> 12) & 0xf0; + /* Check if the processor is Haswell or Broadwell */ + if (vendor == signature_intel_ebx) { + if (family == 0x06) + model += extended_model; + if (brand_id == 0 && family == 0x6) { + for (i = 0; i < RTE_DIM(broadwell_models); i++) + if (model == broadwell_models[i]) { + haswell_broadwell_cpu = 1; + return; + } + for (i = 0; i < RTE_DIM(haswell_models); i++) + if (model == haswell_models[i]) { + haswell_broadwell_cpu = 1; + return; + } + } } - mlx5_glue = *sym; -#endif /* RTE_IBVERBS_LINK_DLOPEN */ -#ifndef NDEBUG - /* Glue structure must not contain any NULL pointers. */ - { - unsigned int i; +#endif + haswell_broadwell_cpu = 0; +} - for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) - assert(((const void *const *)mlx5_glue)[i]); - } +/** + * Allocate the User Access Region with DevX on specified device. + * + * @param [in] ctx + * Infiniband device context to perform allocation on. + * @param [in] mapping + * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining + * attributes (if supported by the host), the + * writes to the UAR registers must be followed + * by write memory barrier. + * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are + * promoted to the registers immediately, no + * memory barriers needed. + * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, + * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC + * is performed. The drivers specifying negative values should + * always provide the write memory barrier operation after UAR + * register writings. + * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma + * library headers), the caller can specify 0. + * + * @return + * UAR object pointer on success, NULL otherwise and rte_errno is set. + */ +void * +mlx5_devx_alloc_uar(void *ctx, int mapping) +{ + void *uar; + uint32_t retry, uar_mapping; + void *base_addr; + + for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { +#ifdef MLX5DV_UAR_ALLOC_TYPE_NC + /* Control the mapping type according to the settings. */ + uar_mapping = (mapping < 0) ? + MLX5DV_UAR_ALLOC_TYPE_NC : mapping; +#else + /* + * It seems we have no way to control the memory mapping type + * for the UAR, the default "Write-Combining" type is supposed. + */ + uar_mapping = 0; + RTE_SET_USED(mapping); #endif - if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { - rte_errno = EINVAL; - DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " - "required", mlx5_glue->version, MLX5_GLUE_VERSION); - goto glue_error; + uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); +#ifdef MLX5DV_UAR_ALLOC_TYPE_NC + if (!uar && + mapping < 0 && + uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { + /* + * In some environments like virtual machine the + * Write Combining mapped might be not supported and + * UAR allocation fails. We tried "Non-Cached" mapping + * for the case. + */ + DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; + uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); + } else if (!uar && + mapping < 0 && + uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { + /* + * If Verbs/kernel does not support "Non-Cached" + * try the "Write-Combining". + */ + DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; + uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); + } +#endif + if (!uar) { + DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); + rte_errno = ENOMEM; + goto exit; + } + base_addr = mlx5_os_get_devx_uar_base_addr(uar); + if (base_addr) + break; + /* + * The UARs are allocated by rdma_core within the + * IB device context, on context closure all UARs + * will be freed, should be no memory/object leakage. + */ + DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); + uar = NULL; } - mlx5_glue->fork_init(); - return; -glue_error: - if (handle) - dlclose(handle); - DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" - " run-time dependency on rdma-core libraries (libibverbs," - " libmlx5)"); - mlx5_glue = NULL; - return; + /* Check whether we finally succeeded with valid UAR allocation. */ + if (!uar) { + DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); + rte_errno = ENOMEM; + } + /* + * Return void * instead of struct mlx5dv_devx_uar * + * is for compatibility with older rdma-core library headers. + */ +exit: + return uar; }