X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx4%2Fmlx4.c;h=8e298788af081058a53d119af4b996d41cc7af16;hb=5908712aa5bfedc5d2d1d18df46e8673794882af;hp=315640a6d782dbafbbde2e8a73566b933f3fc2de;hpb=0203d33a105982da3eeff5a890f4d60f23234304;p=dpdk.git diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 315640a6d7..8e298788af 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -8,8 +8,6 @@ * mlx4 driver initialization. */ -#include -#include #include #include #include @@ -19,6 +17,9 @@ #include #include #include +#ifdef RTE_IBVERBS_LINK_DLOPEN +#include +#endif /* Verbs headers do not support -pedantic. */ #ifdef PEDANTIC @@ -30,7 +31,6 @@ #endif #include -#include #include #include #include @@ -60,17 +60,23 @@ static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; /* Process local data for secondary processes. */ static struct mlx4_local_data mlx4_local_data; +/** Driver-specific log messages type. */ +int mlx4_logtype; + /** Configuration structure for device arguments. */ struct mlx4_conf { struct { uint32_t present; /**< Bit-field for existing ports. */ uint32_t enabled; /**< Bit-field for user-enabled ports. */ } ports; + int mr_ext_memseg_en; + /** Whether memseg should be extended for MR creation. */ }; /* Available parameters list. */ const char *pmd_mlx4_init_params[] = { MLX4_PMD_PORT_KVARG, + MLX4_MR_EXT_MEMSEG_EN_KVARG, NULL, }; @@ -123,30 +129,6 @@ error: return ret; } -/** - * Uninitialize shared data between primary and secondary process. - * - * The pointer of secondary process is dereferenced and primary process frees - * the memzone. - */ -static void -mlx4_uninit_shared_data(void) -{ - const struct rte_memzone *mz; - - rte_spinlock_lock(&mlx4_shared_data_lock); - if (mlx4_shared_data) { - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA); - rte_memzone_free(mz); - } else { - memset(&mlx4_local_data, 0, sizeof(mlx4_local_data)); - } - mlx4_shared_data = NULL; - } - rte_spinlock_unlock(&mlx4_shared_data_lock); -} - #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS /** * Verbs callback to allocate a memory. This function should allocate the space @@ -180,7 +162,7 @@ mlx4_alloc_verbs_buf(size_t size, void *data) socket = rxq->socket; } - assert(data != NULL); + MLX4_ASSERT(data != NULL); ret = rte_malloc_socket(__func__, size, alignment, socket); if (!ret && size) rte_errno = ENOMEM; @@ -198,11 +180,58 @@ mlx4_alloc_verbs_buf(size_t size, void *data) static void mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) { - assert(data != NULL); + MLX4_ASSERT(data != NULL); rte_free(ptr); } #endif +/** + * Initialize process private data structure. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx4_proc_priv_init(struct rte_eth_dev *dev) +{ + struct mlx4_proc_priv *ppriv; + size_t ppriv_size; + + /* + * UAR register table follows the process private structure. BlueFlame + * registers for Tx queues are stored in the table. + */ + ppriv_size = sizeof(struct mlx4_proc_priv) + + dev->data->nb_tx_queues * sizeof(void *); + ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size, + RTE_CACHE_LINE_SIZE, dev->device->numa_node); + if (!ppriv) { + rte_errno = ENOMEM; + return -rte_errno; + } + ppriv->uar_table_sz = ppriv_size; + dev->process_private = ppriv; + return 0; +} + +/** + * Un-initialize process private data structure. + * + * @param dev + * Pointer to Ethernet device structure. + */ +static void +mlx4_proc_priv_uninit(struct rte_eth_dev *dev) +{ + if (!dev->process_private) + return; + rte_free(dev->process_private); + dev->process_private = NULL; +} + /** * DPDK callback for Ethernet device configuration. * @@ -219,6 +248,9 @@ mlx4_dev_configure(struct rte_eth_dev *dev) struct rte_flow_error error; int ret; + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) + dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; + /* Prepare internal flow rules. */ ret = mlx4_flow_sync(priv, &error); if (ret) { @@ -229,9 +261,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev) goto exit; } ret = mlx4_intr_install(priv); - if (ret) + if (ret) { ERROR("%p: interrupt handler installation failed", (void *)dev); + goto exit; + } + ret = mlx4_proc_priv_init(dev); + if (ret) { + ERROR("%p: process private data allocation failed", + (void *)dev); + goto exit; + } exit: return ret; } @@ -259,18 +299,13 @@ mlx4_dev_start(struct rte_eth_dev *dev) return 0; DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); priv->started = 1; - ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd); - if (ret) { - ERROR("%p: cannot remap UAR", (void *)dev); - goto err; - } ret = mlx4_rss_init(priv); if (ret) { ERROR("%p: cannot initialize RSS resources: %s", (void *)dev, strerror(-ret)); goto err; } -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG mlx4_mr_dump_dev(dev); #endif ret = mlx4_rxq_intr_enable(priv); @@ -311,8 +346,6 @@ static void mlx4_dev_stop(struct rte_eth_dev *dev) { struct mlx4_priv *priv = dev->data->dev_private; - const size_t page_size = sysconf(_SC_PAGESIZE); - int i; if (!priv->started) return; @@ -326,15 +359,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev) mlx4_flow_sync(priv, NULL); mlx4_rxq_intr_disable(priv); mlx4_rss_deinit(priv); - for (i = 0; i != dev->data->nb_tx_queues; ++i) { - struct txq *txq; - - txq = dev->data->tx_queues[i]; - if (!txq) - continue; - munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db, - page_size), page_size); - } } /** @@ -365,13 +389,14 @@ mlx4_dev_close(struct rte_eth_dev *dev) mlx4_rx_queue_release(dev->data->rx_queues[i]); for (i = 0; i != dev->data->nb_tx_queues; ++i) mlx4_tx_queue_release(dev->data->tx_queues[i]); + mlx4_proc_priv_uninit(dev); mlx4_mr_release(dev); if (priv->pd != NULL) { - assert(priv->ctx != NULL); + MLX4_ASSERT(priv->ctx != NULL); claim_zero(mlx4_glue->dealloc_pd(priv->pd)); claim_zero(mlx4_glue->close_device(priv->ctx)); } else - assert(priv->ctx == NULL); + MLX4_ASSERT(priv->ctx == NULL); mlx4_intr_uninstall(priv); memset(priv, 0, sizeof(*priv)); } @@ -391,6 +416,7 @@ static const struct eth_dev_ops mlx4_dev_ops = { .mac_addr_remove = mlx4_mac_addr_remove, .mac_addr_add = mlx4_mac_addr_add, .mac_addr_set = mlx4_mac_addr_set, + .set_mc_addr_list = mlx4_set_mc_addr_list, .stats_get = mlx4_stats_get, .stats_reset = mlx4_stats_reset, .fw_version_get = mlx4_fw_version_get, @@ -509,6 +535,8 @@ mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) return -rte_errno; } conf->ports.enabled |= 1 << tmp; + } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { + conf->mr_ext_memseg_en = !!tmp; } else { rte_errno = EINVAL; WARN("%s: unknown parameter", key); @@ -544,10 +572,10 @@ mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) } /* Process parameters. */ for (i = 0; pmd_mlx4_init_params[i]; ++i) { - arg_count = rte_kvargs_count(kvlist, MLX4_PMD_PORT_KVARG); + arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); while (arg_count-- > 0) { ret = rte_kvargs_process(kvlist, - MLX4_PMD_PORT_KVARG, + pmd_mlx4_init_params[i], (int (*)(const char *, const char *, void *)) @@ -657,130 +685,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, static struct rte_pci_driver mlx4_driver; -static int -find_lower_va_bound(const struct rte_memseg_list *msl, - const struct rte_memseg *ms, void *arg) -{ - void **addr = arg; - - if (msl->external) - return 0; - if (*addr == NULL) - *addr = ms->addr; - else - *addr = RTE_MIN(*addr, ms->addr); - - return 0; -} - -/** - * Reserve UAR address space for primary process. - * - * Process local resource is used by both primary and secondary to avoid - * duplicate reservation. The space has to be available on both primary and - * secondary process, TXQ UAR maps to this area using fixed mmap w/o double - * check. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx4_uar_init_primary(void) -{ - struct mlx4_shared_data *sd = mlx4_shared_data; - void *addr = (void *)0; - - if (sd->uar_base) - return 0; - /* find out lower bound of hugepage segments */ - rte_memseg_walk(find_lower_va_bound, &addr); - /* keep distance to hugepages to minimize potential conflicts. */ - addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE)); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(addr, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("failed to reserve UAR address space, please" - " adjust MLX4_UAR_SIZE or try --base-virtaddr"); - rte_errno = ENOMEM; - return -rte_errno; - } - /* Accept either same addr or a new addr returned from mmap if target - * range occupied. - */ - INFO("reserved UAR address space: %p", addr); - sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */ - return 0; -} - -/** - * Unmap UAR address space reserved for primary process. - */ -static void -mlx4_uar_uninit_primary(void) -{ - struct mlx4_shared_data *sd = mlx4_shared_data; - - if (!sd->uar_base) - return; - munmap(sd->uar_base, MLX4_UAR_SIZE); - sd->uar_base = NULL; -} - -/** - * Reserve UAR address space for secondary process, align with primary process. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx4_uar_init_secondary(void) -{ - struct mlx4_shared_data *sd = mlx4_shared_data; - struct mlx4_local_data *ld = &mlx4_local_data; - void *addr; - - if (ld->uar_base) { /* Already reserved. */ - assert(sd->uar_base == ld->uar_base); - return 0; - } - assert(sd->uar_base); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(sd->uar_base, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("UAR mmap failed: %p size: %llu", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; - return -rte_errno; - } - if (sd->uar_base != addr) { - ERROR("UAR address %p size %llu occupied, please" - " adjust MLX4_UAR_OFFSET or try EAL parameter" - " --base-virtaddr", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; - return -rte_errno; - } - ld->uar_base = addr; - INFO("reserved UAR address space: %p", addr); - return 0; -} - -/** - * Unmap UAR address space reserved for secondary process. - */ -static void -mlx4_uar_uninit_secondary(void) -{ - struct mlx4_local_data *ld = &mlx4_local_data; - - if (!ld->uar_base) - return; - munmap(ld->uar_base, MLX4_UAR_SIZE); - ld->uar_base = NULL; -} - /** * PMD global initialization. * @@ -796,12 +700,12 @@ mlx4_init_once(void) { struct mlx4_shared_data *sd; struct mlx4_local_data *ld = &mlx4_local_data; - int ret; + int ret = 0; if (mlx4_init_shared_data()) return -rte_errno; sd = mlx4_shared_data; - assert(sd); + MLX4_ASSERT(sd); rte_spinlock_lock(&sd->lock); switch (rte_eal_process_type()) { case RTE_PROC_PRIMARY: @@ -811,44 +715,26 @@ mlx4_init_once(void) rte_rwlock_init(&sd->mem_event_rwlock); rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", mlx4_mr_mem_event_cb, NULL); - mlx4_mp_init_primary(); - ret = mlx4_uar_init_primary(); + ret = mlx4_mp_init_primary(); if (ret) - goto error; - sd->init_done = true; + goto out; + sd->init_done = 1; break; case RTE_PROC_SECONDARY: if (ld->init_done) break; - mlx4_mp_init_secondary(); - ret = mlx4_uar_init_secondary(); + ret = mlx4_mp_init_secondary(); if (ret) - goto error; + goto out; ++sd->secondary_cnt; - ld->init_done = true; + ld->init_done = 1; break; default: break; } +out: rte_spinlock_unlock(&sd->lock); - return 0; -error: - switch (rte_eal_process_type()) { - case RTE_PROC_PRIMARY: - mlx4_uar_uninit_primary(); - mlx4_mp_uninit_primary(); - rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL); - break; - case RTE_PROC_SECONDARY: - mlx4_uar_uninit_secondary(); - mlx4_mp_uninit_secondary(); - break; - default: - break; - } - rte_spinlock_unlock(&sd->lock); - mlx4_uninit_shared_data(); - return -rte_errno; + return ret; } /** @@ -876,9 +762,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_device_attr_ex device_attr_ex; struct mlx4_conf conf = { .ports.present = 0, + .mr_ext_memseg_en = 1, }; unsigned int vf; int i; + char ifname[IF_NAMESIZE]; (void)pci_drv; err = mlx4_init_once(); @@ -887,16 +775,16 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) strerror(rte_errno)); return -rte_errno; } - assert(pci_drv == &mlx4_driver); + MLX4_ASSERT(pci_drv == &mlx4_driver); list = mlx4_glue->get_device_list(&i); if (list == NULL) { rte_errno = errno; - assert(rte_errno); + MLX4_ASSERT(rte_errno); if (rte_errno == ENOSYS) ERROR("cannot list devices, is ib_uverbs loaded?"); return -rte_errno; } - assert(i >= 0); + MLX4_ASSERT(i >= 0); /* * For each listed device, check related sysfs entry against * the provided PCI ID. @@ -933,7 +821,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) ERROR("cannot use device, are drivers up to date?"); return -rte_errno; } - assert(err > 0); + MLX4_ASSERT(err > 0); rte_errno = err; return -rte_errno; } @@ -958,7 +846,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) err = ENODEV; goto error; } - assert(device_attr.max_sge >= MLX4_MAX_SGE); + MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE); for (i = 0; i < device_attr.phys_port_cnt; i++) { uint32_t port = i + 1; /* ports are indexed from one */ struct ibv_context *ctx = NULL; @@ -966,7 +854,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_pd *pd = NULL; struct mlx4_priv *priv = NULL; struct rte_eth_dev *eth_dev = NULL; - struct ether_addr mac; + struct rte_ether_addr mac; char name[RTE_ETH_NAME_MAX_LEN]; /* If port is not enabled, skip. */ @@ -999,6 +887,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) } eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx4_dev_sec_ops; + err = mlx4_proc_priv_init(eth_dev); + if (err) + goto error; /* Receive command fd from primary process. */ err = mlx4_mp_req_verbs_cmd_fd(eth_dev); if (err < 0) { @@ -1006,7 +897,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) goto error; } /* Remap UAR for Tx queues. */ - err = mlx4_tx_uar_remap(eth_dev, err); + err = mlx4_tx_uar_init_secondary(eth_dev, err); if (err) { err = rte_errno; goto error; @@ -1067,7 +958,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->device_attr = device_attr; priv->port = port; priv->pd = pd; - priv->mtu = ETHER_MTU; + priv->mtu = RTE_ETHER_MTU; priv->vf = vf; priv->hw_csum = !!(device_attr.device_cap_flags & IBV_DEVICE_RAW_IP_CSUM); @@ -1100,6 +991,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) device_attr_ex.tso_caps.max_tso; DEBUG("TSO is %ssupported", priv->tso ? "" : "not "); + priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; /* Configure the first MAC address by default. */ err = mlx4_get_mac(priv, &mac.addr_bytes); if (err) { @@ -1114,17 +1006,15 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) mac.addr_bytes[4], mac.addr_bytes[5]); /* Register MAC address. */ priv->mac[0] = mac; -#ifndef NDEBUG - { - char ifname[IF_NAMESIZE]; - - if (mlx4_get_ifname(priv, &ifname) == 0) - DEBUG("port %u ifname is \"%s\"", - priv->port, ifname); - else - DEBUG("port %u ifname is unknown", priv->port); + + if (mlx4_get_ifname(priv, &ifname) == 0) { + DEBUG("port %u ifname is \"%s\"", + priv->port, ifname); + priv->if_index = if_nametoindex(ifname); + } else { + DEBUG("port %u ifname is unknown", priv->port); } -#endif + /* Get actual MTU if possible. */ mlx4_mtu_get(priv, &priv->mtu); DEBUG("port %u MTU is %u", priv->port, priv->mtu); @@ -1254,8 +1144,7 @@ static struct rte_pci_driver mlx4_driver = { }, .id_table = mlx4_pci_id_map, .probe = mlx4_pci_probe, - .drv_flags = RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_INTR_RMV, + .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, }; #ifdef RTE_IBVERBS_LINK_DLOPEN @@ -1393,6 +1282,11 @@ glue_error: */ RTE_INIT(rte_mlx4_pmd_init) { + /* Initialize driver log type. */ + mlx4_logtype = rte_log_register("pmd.net.mlx4"); + if (mlx4_logtype >= 0) + rte_log_set_level(mlx4_logtype, RTE_LOG_NOTICE); + /* * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we * want to get success errno value in case of calling them @@ -1409,15 +1303,15 @@ RTE_INIT(rte_mlx4_pmd_init) #ifdef RTE_IBVERBS_LINK_DLOPEN if (mlx4_glue_init()) return; - assert(mlx4_glue); + MLX4_ASSERT(mlx4_glue); #endif -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG /* Glue structure must not contain any NULL pointers. */ { unsigned int i; for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) - assert(((const void *const *)mlx4_glue)[i]); + MLX4_ASSERT(((const void *const *)mlx4_glue)[i]); } #endif if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) {