X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5.c;h=3e0a1b1869bf8739b0a30a1ae200d22932556528;hb=a9fc0b0ef0c276fcea082b20574e27cca1bac3e8;hp=05d4f024a9ce0d8a1dade81286631da20f24fe85;hpb=e9f4166014cb7abf8c74b913ff046487d0e7bea2;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 05d4f024a9..3e0a1b1869 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include "mlx5.h" #include "mlx5_utils.h" @@ -41,10 +43,23 @@ #include "mlx5_autoconf.h" #include "mlx5_defs.h" #include "mlx5_glue.h" +#include "mlx5_mr.h" /* Device parameter to enable RX completion queue compression. */ #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" +/* Device parameter to enable Multi-Packet Rx queue. */ +#define MLX5_RX_MPRQ_EN "mprq_en" + +/* Device parameter to configure log 2 of the number of strides for MPRQ. */ +#define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" + +/* Device parameter to limit the size of memcpy'd packet for MPRQ. */ +#define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" + +/* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ +#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" + /* Device parameter to configure inline send. */ #define MLX5_TXQ_INLINE "txq_inline" @@ -84,9 +99,50 @@ #define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4) #endif +static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data"; + +/* Shared memory between primary and secondary processes. */ +struct mlx5_shared_data *mlx5_shared_data; + +/* Spinlock for mlx5_shared_data allocation. */ +static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER; + /** Driver-specific log messages type. */ int mlx5_logtype; +/** + * Prepare shared data between primary and secondary process. + */ +static void +mlx5_prepare_shared_data(void) +{ + const struct rte_memzone *mz; + + rte_spinlock_lock(&mlx5_shared_data_lock); + if (mlx5_shared_data == NULL) { + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* Allocate shared memory. */ + mz = rte_memzone_reserve(MZ_MLX5_PMD_SHARED_DATA, + sizeof(*mlx5_shared_data), + SOCKET_ID_ANY, 0); + } else { + /* Lookup allocated shared memory. */ + mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA); + } + if (mz == NULL) + rte_panic("Cannot allocate mlx5 shared data\n"); + mlx5_shared_data = mz->addr; + /* Initialize shared data. */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + LIST_INIT(&mlx5_shared_data->mem_event_cb_list); + rte_rwlock_init(&mlx5_shared_data->mem_event_rwlock); + } + rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", + mlx5_mr_mem_event_cb, NULL); + } + rte_spinlock_unlock(&mlx5_shared_data_lock); +} + /** * Retrieve integer value from environment variable. * @@ -201,6 +257,8 @@ mlx5_dev_close(struct rte_eth_dev *dev) priv->txqs = NULL; } mlx5_flow_delete_drop_queue(dev); + mlx5_mprq_free_mp(dev); + mlx5_mr_release(dev); if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(mlx5_glue->dealloc_pd(priv->pd)); @@ -245,10 +303,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) if (ret) DRV_LOG(WARNING, "port %u some flows still remain", dev->data->port_id); - ret = mlx5_mr_verify(dev); - if (ret) - DRV_LOG(WARNING, "port %u some memory region still remain", - dev->data->port_id); memset(priv, 0, sizeof(*priv)); } @@ -346,39 +400,6 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .is_removed = mlx5_is_removed, }; -static struct { - struct rte_pci_addr pci_addr; /* associated PCI address */ - uint32_t ports; /* physical ports bitfield. */ -} mlx5_dev[32]; - -/** - * Get device index in mlx5_dev[] from PCI bus address. - * - * @param[in] pci_addr - * PCI bus address to look for. - * - * @return - * mlx5_dev[] index on success, -1 on failure. - */ -static int -mlx5_dev_idx(struct rte_pci_addr *pci_addr) -{ - unsigned int i; - int ret = -1; - - assert(pci_addr != NULL); - for (i = 0; (i != RTE_DIM(mlx5_dev)); ++i) { - if ((mlx5_dev[i].pci_addr.domain == pci_addr->domain) && - (mlx5_dev[i].pci_addr.bus == pci_addr->bus) && - (mlx5_dev[i].pci_addr.devid == pci_addr->devid) && - (mlx5_dev[i].pci_addr.function == pci_addr->function)) - return i; - if ((mlx5_dev[i].ports == 0) && (ret == -1)) - ret = i; - } - return ret; -} - /** * Verify and store value for device argument. * @@ -407,6 +428,14 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { config->cqe_comp = !!tmp; + } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { + config->mprq.enabled = !!tmp; + } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { + config->mprq.stride_num_n = tmp; + } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { + config->mprq.max_memcpy_len = tmp; + } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { + config->mprq.min_rxqs_num = tmp; } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { config->txq_inline = tmp; } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { @@ -449,6 +478,10 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) { const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, + MLX5_RX_MPRQ_EN, + MLX5_RX_MPRQ_LOG_STRIDE_NUM, + MLX5_RX_MPRQ_MAX_MEMCPY_LEN, + MLX5_RXQS_MIN_MPRQ, MLX5_TXQ_INLINE, MLX5_TXQS_MIN_INLINE, MLX5_TXQ_MPW_EN, @@ -624,30 +657,27 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, int err = 0; struct ibv_context *attr_ctx = NULL; struct ibv_device_attr_ex device_attr; - unsigned int vf; + unsigned int vf = 0; unsigned int mps; unsigned int cqe_comp; unsigned int tunnel_en = 0; + unsigned int mpls_en = 0; unsigned int swp = 0; unsigned int verb_priorities = 0; - int idx; + unsigned int mprq = 0; + unsigned int mprq_min_stride_size_n = 0; + unsigned int mprq_max_stride_size_n = 0; + unsigned int mprq_min_stride_num_n = 0; + unsigned int mprq_max_stride_num_n = 0; int i; struct mlx5dv_context attrs_out = {0}; #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT - struct ibv_counter_set_description cs_desc; + struct ibv_counter_set_description cs_desc = { .counter_type = 0 }; #endif + /* Prepare shared data between primary and secondary process. */ + mlx5_prepare_shared_data(); assert(pci_drv == &mlx5_driver); - /* Get mlx5_dev[] index. */ - idx = mlx5_dev_idx(&pci_dev->addr); - if (idx == -1) { - DRV_LOG(ERR, "this driver cannot support any more adapters"); - err = ENOMEM; - goto error; - } - DRV_LOG(DEBUG, "using driver device index %d", idx); - /* Save PCI address. */ - mlx5_dev[idx].pci_addr = pci_dev->addr; list = mlx5_glue->get_device_list(&i); if (list == NULL) { assert(errno); @@ -714,6 +744,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, */ #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS; +#endif +#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT + attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ; #endif mlx5_glue->dv_query_device(attr_ctx, &attrs_out); if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { @@ -732,6 +765,33 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, if (attrs_out.comp_mask & MLX5DV_CONTEXT_MASK_SWP) swp = attrs_out.sw_parsing_caps.sw_parsing_offloads; DRV_LOG(DEBUG, "SWP support: %u", swp); +#endif +#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT + if (attrs_out.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) { + struct mlx5dv_striding_rq_caps mprq_caps = + attrs_out.striding_rq_caps; + + DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d", + mprq_caps.min_single_stride_log_num_of_bytes); + DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d", + mprq_caps.max_single_stride_log_num_of_bytes); + DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d", + mprq_caps.min_single_wqe_log_num_of_strides); + DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d", + mprq_caps.max_single_wqe_log_num_of_strides); + DRV_LOG(DEBUG, "\tsupported_qpts: %d", + mprq_caps.supported_qpts); + DRV_LOG(DEBUG, "device supports Multi-Packet RQ"); + mprq = 1; + mprq_min_stride_size_n = + mprq_caps.min_single_stride_log_num_of_bytes; + mprq_max_stride_size_n = + mprq_caps.max_single_stride_log_num_of_bytes; + mprq_min_stride_num_n = + mprq_caps.min_single_wqe_log_num_of_strides; + mprq_max_stride_num_n = + mprq_caps.max_single_wqe_log_num_of_strides; + } #endif if (RTE_CACHE_LINE_SIZE == 128 && !(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) @@ -750,6 +810,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, #else DRV_LOG(WARNING, "tunnel offloading disabled due to old OFED/rdma-core version"); +#endif +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + mpls_en = ((attrs_out.tunnel_offloads_caps & + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) && + (attrs_out.tunnel_offloads_caps & + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP)); + DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported", + mpls_en ? "" : "not "); +#else + DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to" + " old OFED/rdma-core version or firmware configuration"); #endif err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr); if (err) { @@ -762,7 +833,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, char name[RTE_ETH_NAME_MAX_LEN]; int len; uint32_t port = i + 1; /* ports are indexed from one */ - uint32_t test = (1 << i); struct ibv_context *ctx = NULL; struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; @@ -774,6 +844,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, .cqe_comp = cqe_comp, .mps = mps, .tunnel_en = tunnel_en, + .mpls_en = mpls_en, .tx_vec_en = 1, .rx_vec_en = 1, .mpw_hdr_dseg = 0, @@ -782,6 +853,13 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, .inline_max_packet_sz = MLX5_ARG_UNSET, .vf_nl_en = 1, .swp = !!swp, + .mprq = { + .enabled = 0, /* Disabled by default. */ + .stride_num_n = RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, + mprq_min_stride_num_n), + .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, + .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, + }, }; len = snprintf(name, sizeof(name), PCI_PRI_FMT, @@ -789,7 +867,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, pci_dev->addr.devid, pci_dev->addr.function); if (device_attr.orig_attr.phys_port_cnt > 1) snprintf(name + len, sizeof(name), " port %u", i); - mlx5_dev[idx].ports |= test; if (rte_eal_process_type() == RTE_PROC_SECONDARY) { eth_dev = rte_eth_dev_attach_secondary(name); if (eth_dev == NULL) { @@ -826,9 +903,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, mlx5_select_rx_function(eth_dev); eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev); + rte_eth_dev_probing_finish(eth_dev); continue; } - DRV_LOG(DEBUG, "using port %u (%08" PRIx32 ")", port, test); + DRV_LOG(DEBUG, "using port %u", port); ctx = mlx5_glue->open_device(ibv_dev); if (ctx == NULL) { err = ENODEV; @@ -859,7 +937,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, err = ENOMEM; goto port_error; } - mlx5_dev[idx].ports |= test; /* from rte_ethdev.c */ priv = rte_zmalloc("ethdev private structure", sizeof(*priv), @@ -878,9 +955,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, priv->mtu = ETHER_MTU; err = mlx5_args(&config, pci_dev->device.devargs); if (err) { - DRV_LOG(ERR, "failed to process device arguments: %s", - strerror(err)); err = rte_errno; + DRV_LOG(ERR, "failed to process device arguments: %s", + strerror(rte_errno)); goto port_error; } err = mlx5_glue->query_device_ex(ctx, NULL, &device_attr_ex); @@ -947,6 +1024,22 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(WARNING, "Rx CQE compression isn't supported"); config.cqe_comp = 0; } + config.mprq.enabled = config.mprq.enabled && mprq; + if (config.mprq.enabled) { + if (config.mprq.stride_num_n > mprq_max_stride_num_n || + config.mprq.stride_num_n < mprq_min_stride_num_n) { + config.mprq.stride_num_n = + RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, + mprq_min_stride_num_n); + DRV_LOG(WARNING, + "the number of strides" + " for Multi-Packet RQ is out of range," + " setting default value (%u)", + 1 << config.mprq.stride_num_n); + } + config.mprq.min_stride_size_n = mprq_min_stride_size_n; + config.mprq.max_stride_size_n = mprq_max_stride_size_n; + } eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { DRV_LOG(ERR, "can not allocate rte ethdev"); @@ -954,7 +1047,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, goto port_error; } eth_dev->data->dev_private = priv; - priv->dev = eth_dev; + priv->dev_data = eth_dev->data; eth_dev->data->mac_addrs = priv->mac; eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); @@ -969,7 +1062,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(ERR, "port %u cannot get MAC address, is mlx5_en" " loaded? (errno: %s)", - eth_dev->data->port_id, strerror(errno)); + eth_dev->data->port_id, strerror(rte_errno)); err = ENODEV; goto port_error; } @@ -1052,9 +1145,29 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) { DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u", eth_dev->data->port_id, verb_priorities); + err = ENOTSUP; goto port_error; } priv->config.max_verbs_prio = verb_priorities; + /* + * Once the device is added to the list of memory event + * callback, its global MR cache table cannot be expanded + * on the fly because of deadlock. If it overflows, lookup + * should be done by searching MR list linearly, which is slow. + */ + err = mlx5_mr_btree_init(&priv->mr.cache, + MLX5_MR_BTREE_CACHE_N * 2, + eth_dev->device->numa_node); + if (err) { + err = rte_errno; + goto port_error; + } + /* Add device to memory callback list. */ + rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); + LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, + priv, mem_event_cb); + rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); + rte_eth_dev_probing_finish(eth_dev); continue; port_error: if (priv) @@ -1063,6 +1176,8 @@ port_error: claim_zero(mlx5_glue->dealloc_pd(pd)); if (ctx) claim_zero(mlx5_glue->close_device(ctx)); + if (eth_dev && rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_eth_dev_release_port(eth_dev); break; } /* @@ -1071,11 +1186,6 @@ port_error: * long as the dpdk does not provide a way to deallocate a ethdev and a * way to enumerate the registered ethdevs to free the previous ones. */ - /* no port found, complain */ - if (!mlx5_dev[idx].ports) { - rte_errno = ENODEV; - err = rte_errno; - } error: if (attr_ctx) claim_zero(mlx5_glue->close_device(attr_ctx)); @@ -1121,6 +1231,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) + }, { .vendor_id = 0 }