X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5.c;h=f49d30c05cd2de0c6b84ec590efe2a9111c3639b;hb=d84c3cf7662c6abca83df803aaa5136b73d3831d;hp=abd7ff70df0275cba5c86f96f47194c193825685;hpb=e6988afdc75a628bba20bbc291831e63902b20a6;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index abd7ff70df..f49d30c05c 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include @@ -20,6 +19,7 @@ #include #include #include +#include #include #include @@ -28,19 +28,21 @@ #include #include #include -#include #include #include "mlx5_defs.h" #include "mlx5.h" #include "mlx5_utils.h" #include "mlx5_rxtx.h" +#include "mlx5_rx.h" +#include "mlx5_tx.h" #include "mlx5_autoconf.h" -#include "mlx5_mr.h" #include "mlx5_flow.h" #include "mlx5_flow_os.h" #include "rte_pmd_mlx5.h" +#define MLX5_ETH_DRIVER_NAME mlx5_eth + /* Device parameter to enable RX completion queue compression. */ #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" @@ -89,12 +91,6 @@ /* Device parameter to enable multi-packet send WQEs. */ #define MLX5_TXQ_MPW_EN "txq_mpw_en" -/* - * Device parameter to force doorbell register mapping - * to non-cahed region eliminating the extra write memory barrier. - */ -#define MLX5_TX_DB_NC "tx_db_nc" - /* * Device parameter to include 2 dsegs in the title WQEBB. * Deprecated, ignored. @@ -147,9 +143,6 @@ /* Activate Netlink support in VF mode. */ #define MLX5_VF_NL_EN "vf_nl_en" -/* Enable extending memsegs when creating a MR. */ -#define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" - /* Select port representors to instantiate. */ #define MLX5_REPRESENTOR "representor" @@ -168,11 +161,15 @@ /* Flow memory reclaim mode. */ #define MLX5_RECLAIM_MEM "reclaim_mem_mode" -/* The default memory allocator used in PMD. */ -#define MLX5_SYS_MEM_EN "sys_mem_en" /* Decap will be used or not. */ #define MLX5_DECAP_EN "decap_en" +/* Device parameter to configure allow or prevent duplicate rules pattern. */ +#define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern" + +/* Device parameter to configure the delay drop when creating Rxqs. */ +#define MLX5_DELAY_DROP "delay_drop" + /* Shared memory between primary and secondary processes. */ struct mlx5_shared_data *mlx5_shared_data; @@ -212,7 +209,8 @@ static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { .grow_trunk = 3, .grow_shift = 2, .need_lock = 1, - .release_mem_en = 1, + .release_mem_en = 0, + .per_core_cache = (1 << 16), .malloc = mlx5_malloc, .free = mlx5_free, .type = "mlx5_tag_ipool", @@ -275,10 +273,13 @@ static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { }, #endif [MLX5_IPOOL_MTR] = { - .size = sizeof(struct mlx5_flow_meter), + /** + * The ipool index should grow continually from small to big, + * for meter idx, so not set grow_trunk to avoid meter index + * not jump continually. + */ + .size = sizeof(struct mlx5_legacy_flow_meter), .trunk_size = 64, - .grow_trunk = 3, - .grow_shift = 2, .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, @@ -317,7 +318,8 @@ static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { .grow_trunk = 3, .grow_shift = 2, .need_lock = 1, - .release_mem_en = 1, + .release_mem_en = 0, + .per_core_cache = 1 << 19, .malloc = mlx5_malloc, .free = mlx5_free, .type = "mlx5_flow_handle_ipool", @@ -347,13 +349,64 @@ static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { .free = mlx5_free, .type = "mlx5_shared_action_rss", }, + [MLX5_IPOOL_MTR_POLICY] = { + /** + * The ipool index should grow continually from small to big, + * for policy idx, so not set grow_trunk to avoid policy index + * not jump continually. + */ + .size = sizeof(struct mlx5_flow_meter_sub_policy), + .trunk_size = 64, + .need_lock = 1, + .release_mem_en = 1, + .malloc = mlx5_malloc, + .free = mlx5_free, + .type = "mlx5_meter_policy_ipool", + }, }; - #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 -#define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096 +#define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 + +/** + * Decide whether representor ID is a HPF(host PF) port on BF2. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * Non-zero if HPF, otherwise 0. + */ +bool +mlx5_is_hpf(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); + int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); + + return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && + MLX5_REPRESENTOR_REPR(-1) == repr; +} + +/** + * Decide whether representor ID is a SF port representor. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * Non-zero if HPF, otherwise 0. + */ +bool +mlx5_is_sf_repr(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); + + return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; +} /** * Initialize the ASO aging management structure. @@ -378,12 +431,12 @@ mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh) rte_errno = ENOMEM; return -ENOMEM; } - err = mlx5_aso_queue_init(sh); + err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT); if (err) { mlx5_free(sh->aso_age_mng); return -1; } - rte_spinlock_init(&sh->aso_age_mng->resize_sl); + rte_rwlock_init(&sh->aso_age_mng->resize_rwl); rte_spinlock_init(&sh->aso_age_mng->free_sl); LIST_INIT(&sh->aso_age_mng->free); return 0; @@ -400,8 +453,8 @@ mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) { int i, j; - mlx5_aso_queue_stop(sh); - mlx5_aso_queue_uninit(sh); + mlx5_aso_flow_hit_queue_poll_stop(sh); + mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT); if (sh->aso_age_mng->pools) { struct mlx5_aso_age_pool *pool; @@ -443,6 +496,46 @@ mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) } } +/** + * DV flow counter mode detect and config. + * + * @param dev + * Pointer to rte_eth_dev structure. + * + */ +void +mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr; + bool fallback; + +#ifndef HAVE_IBV_DEVX_ASYNC + fallback = true; +#else + fallback = false; + if (!sh->cdev->config.devx || !sh->config.dv_flow_en || + !hca_attr->flow_counters_dump || + !(hca_attr->flow_counter_bulk_alloc_bitmap & 0x4) || + (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP)) + fallback = true; +#endif + if (fallback) + DRV_LOG(INFO, "Use fall-back DV counter management. Flow " + "counter dump:%d, bulk_alloc_bitmap:0x%hhx.", + hca_attr->flow_counters_dump, + hca_attr->flow_counter_bulk_alloc_bitmap); + /* Initialize fallback mode only on the port initializes sh. */ + if (sh->refcnt == 1) + sh->cmng.counter_fallback = fallback; + else if (fallback != sh->cmng.counter_fallback) + DRV_LOG(WARNING, "Port %d in sh has different fallback mode " + "with others:%d.", PORT_ID(priv), fallback); +#endif +} + /** * Initialize the counters management structure. * @@ -478,8 +571,7 @@ mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; LIST_REMOVE(mng, next); - claim_zero(mlx5_devx_cmd_destroy(mng->dm)); - claim_zero(mlx5_os_umem_dereg(mng->umem)); + mlx5_os_wrapped_mkey_destroy(&mng->wm); mlx5_free(mem); } @@ -539,6 +631,78 @@ mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) memset(&sh->cmng, 0, sizeof(sh->cmng)); } +/** + * Initialize the aso flow meters management structure. + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object to free + */ +int +mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh) +{ + if (!sh->mtrmng) { + sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO, + sizeof(*sh->mtrmng), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (!sh->mtrmng) { + DRV_LOG(ERR, + "meter management allocation was failed."); + rte_errno = ENOMEM; + return -ENOMEM; + } + if (sh->meter_aso_en) { + rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl); + rte_rwlock_init(&sh->mtrmng->pools_mng.resize_mtrwl); + LIST_INIT(&sh->mtrmng->pools_mng.meters); + } + sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID; + } + return 0; +} + +/** + * Close and release all the resources of + * the ASO flow meter management structure. + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object to free. + */ +static void +mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh) +{ + struct mlx5_aso_mtr_pool *mtr_pool; + struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng; + uint32_t idx; +#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO + struct mlx5_aso_mtr *aso_mtr; + int i; +#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ + + if (sh->meter_aso_en) { + mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER); + idx = mtrmng->pools_mng.n_valid; + while (idx--) { + mtr_pool = mtrmng->pools_mng.pools[idx]; +#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO + for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) { + aso_mtr = &mtr_pool->mtrs[i]; + if (aso_mtr->fm.meter_action) + claim_zero + (mlx5_glue->destroy_flow_action + (aso_mtr->fm.meter_action)); + } +#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ + claim_zero(mlx5_devx_cmd_destroy + (mtr_pool->devx_obj)); + mtrmng->pools_mng.n_valid--; + mlx5_free(mtr_pool); + } + mlx5_free(sh->mtrmng->pools_mng.pools); + } + mlx5_free(sh->mtrmng); + sh->mtrmng = NULL; +} + /* Send FLOW_AGED event if needed. */ void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) @@ -550,12 +714,104 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) age_info = &sh->port[i].age_info; if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) continue; - if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) + MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); + if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { + MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); rte_eth_dev_callback_process (&rte_eth_devices[sh->port[i].devx_ih_port_id], RTE_ETH_EVENT_FLOW_AGED, NULL); - age_info->flags = 0; + } + } +} + +/* + * Initialize the ASO connection tracking structure. + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh) +{ + int err; + + if (sh->ct_mng) + return 0; + sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (!sh->ct_mng) { + DRV_LOG(ERR, "ASO CT management allocation failed."); + rte_errno = ENOMEM; + return -rte_errno; + } + err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING); + if (err) { + mlx5_free(sh->ct_mng); + /* rte_errno should be extracted from the failure. */ + rte_errno = EINVAL; + return -rte_errno; + } + rte_spinlock_init(&sh->ct_mng->ct_sl); + rte_rwlock_init(&sh->ct_mng->resize_rwl); + LIST_INIT(&sh->ct_mng->free_cts); + return 0; +} + +/* + * Close and release all the resources of the + * ASO connection tracking management structure. + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object to free. + */ +static void +mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh) +{ + struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; + struct mlx5_aso_ct_pool *ct_pool; + struct mlx5_aso_ct_action *ct; + uint32_t idx; + uint32_t val; + uint32_t cnt; + int i; + + mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING); + idx = mng->next; + while (idx--) { + cnt = 0; + ct_pool = mng->pools[idx]; + for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) { + ct = &ct_pool->actions[i]; + val = __atomic_fetch_sub(&ct->refcnt, 1, + __ATOMIC_RELAXED); + MLX5_ASSERT(val == 1); + if (val > 1) + cnt++; +#ifdef HAVE_MLX5_DR_ACTION_ASO_CT + if (ct->dr_action_orig) + claim_zero(mlx5_glue->destroy_flow_action + (ct->dr_action_orig)); + if (ct->dr_action_rply) + claim_zero(mlx5_glue->destroy_flow_action + (ct->dr_action_rply)); +#endif + } + claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj)); + if (cnt) { + DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u", + cnt, i); + } + mlx5_free(ct_pool); + /* in case of failure. */ + mng->next--; } + mlx5_free(mng->pools); + mlx5_free(mng); + /* Management structure must be cleared to 0s during allocation. */ + sh->ct_mng = NULL; } /** @@ -563,12 +819,9 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) * * @param[in] sh * Pointer to mlx5_dev_ctx_shared object. - * @param[in] sh - * Pointer to user dev config. */ static void -mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, - const struct mlx5_dev_config *config) +mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh) { uint8_t i; struct mlx5_indexed_pool_config cfg; @@ -583,17 +836,22 @@ mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, * according to PCI function flow configuration. */ case MLX5_IPOOL_MLX5_FLOW: - cfg.size = config->dv_flow_en ? + cfg.size = sh->config.dv_flow_en ? sizeof(struct mlx5_flow_handle) : MLX5_FLOW_HANDLE_VERBS_SIZE; break; } - if (config->reclaim_mode) + if (sh->config.reclaim_mode) { cfg.release_mem_en = 1; + cfg.per_core_cache = 0; + } else { + cfg.release_mem_en = 0; + } sh->ipool[i] = mlx5_ipool_create(&cfg); } } + /** * Release the flow resources' indexed mempool. * @@ -607,6 +865,9 @@ mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) for (i = 0; i < MLX5_IPOOL_MAX; ++i) mlx5_ipool_destroy(sh->ipool[i]); + for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i) + if (sh->mdh_ipools[i]) + mlx5_ipool_destroy(sh->mdh_ipools[i]); } /* @@ -622,8 +883,7 @@ bool mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_flex_parser_profiles *prf = - &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; + struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; return !!prf->obj; } @@ -642,15 +902,14 @@ int mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_flex_parser_profiles *prf = - &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; + struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; struct mlx5_devx_graph_node_attr node = { .modify_field_select = 0, }; uint32_t ids[8]; int ret; - if (!priv->config.hca_attr.parse_graph_flex_node) { + if (!priv->sh->cdev->config.hca_attr.parse_graph_flex_node) { DRV_LOG(ERR, "Dynamic flex parser is not supported " "for device %s.", priv->dev_data->name); return -ENOTSUP; @@ -677,7 +936,7 @@ mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) * start after the common header that with the length of a DW(u32). */ node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); - prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node); + prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->cdev->ctx, &node); if (!prf->obj) { DRV_LOG(ERR, "Failed to create flex parser node object."); return (rte_errno == 0) ? -ENODEV : -rte_errno; @@ -706,150 +965,416 @@ static void mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_flex_parser_profiles *prf = - &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; + struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; if (prf->obj) mlx5_devx_cmd_destroy(prf->obj); prf->obj = NULL; } -/* - * Allocate Rx and Tx UARs in robust fashion. - * This routine handles the following UAR allocation issues: +uint32_t +mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr) +{ + uint32_t sw_parsing_offloads = 0; + + if (attr->swp) { + sw_parsing_offloads |= MLX5_SW_PARSING_CAP; + if (attr->swp_csum) + sw_parsing_offloads |= MLX5_SW_PARSING_CSUM_CAP; + + if (attr->swp_lso) + sw_parsing_offloads |= MLX5_SW_PARSING_TSO_CAP; + } + return sw_parsing_offloads; +} + +uint32_t +mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr) +{ + uint32_t tn_offloads = 0; + + if (attr->tunnel_stateless_vxlan) + tn_offloads |= MLX5_TUNNELED_OFFLOADS_VXLAN_CAP; + if (attr->tunnel_stateless_gre) + tn_offloads |= MLX5_TUNNELED_OFFLOADS_GRE_CAP; + if (attr->tunnel_stateless_geneve_rx) + tn_offloads |= MLX5_TUNNELED_OFFLOADS_GENEVE_CAP; + return tn_offloads; +} + +/* Fill all fields of UAR structure. */ +static int +mlx5_rxtx_uars_prepare(struct mlx5_dev_ctx_shared *sh) +{ + int ret; + + ret = mlx5_devx_uar_prepare(sh->cdev, &sh->tx_uar); + if (ret) { + DRV_LOG(ERR, "Failed to prepare Tx DevX UAR."); + return -rte_errno; + } + MLX5_ASSERT(sh->tx_uar.obj); + MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar.obj)); + ret = mlx5_devx_uar_prepare(sh->cdev, &sh->rx_uar); + if (ret) { + DRV_LOG(ERR, "Failed to prepare Rx DevX UAR."); + mlx5_devx_uar_release(&sh->tx_uar); + return -rte_errno; + } + MLX5_ASSERT(sh->rx_uar.obj); + MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->rx_uar.obj)); + return 0; +} + +static void +mlx5_rxtx_uars_release(struct mlx5_dev_ctx_shared *sh) +{ + mlx5_devx_uar_release(&sh->rx_uar); + mlx5_devx_uar_release(&sh->tx_uar); +} + +/** + * rte_mempool_walk() callback to unregister Rx mempools. + * It used when implicit mempool registration is disabled. * - * - tries to allocate the UAR with the most appropriate memory - * mapping type from the ones supported by the host + * @param mp + * The mempool being walked. + * @param arg + * Pointer to the device shared context. + */ +static void +mlx5_dev_ctx_shared_rx_mempool_unregister_cb(struct rte_mempool *mp, void *arg) +{ + struct mlx5_dev_ctx_shared *sh = arg; + + mlx5_dev_mempool_unregister(sh->cdev, mp); +} + +/** + * Callback used when implicit mempool registration is disabled + * in order to track Rx mempool destruction. * - * - tries to allocate the UAR with non-NULL base address - * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as - * UAR base address if UAR was not the first object in the UAR page. - * It caused the PMD failure and we should try to get another UAR - * till we get the first one with non-NULL base address returned. + * @param event + * Mempool life cycle event. + * @param mp + * An Rx mempool registered explicitly when the port is started. + * @param arg + * Pointer to a device shared context. + */ +static void +mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event, + struct rte_mempool *mp, void *arg) +{ + struct mlx5_dev_ctx_shared *sh = arg; + + if (event == RTE_MEMPOOL_EVENT_DESTROY) + mlx5_dev_mempool_unregister(sh->cdev, mp); +} + +int +mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + int ret; + + /* Check if we only need to track Rx mempool destruction. */ + if (!sh->cdev->config.mr_mempool_reg_en) { + ret = rte_mempool_event_callback_register + (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); + return ret == 0 || rte_errno == EEXIST ? 0 : ret; + } + return mlx5_dev_mempool_subscribe(sh->cdev); +} + +/** + * Set up multiple TISs with different affinities according to + * number of bonding ports + * + * @param priv + * Pointer of shared context. + * + * @return + * Zero on success, -1 otherwise. */ static int -mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, - const struct mlx5_dev_config *config) +mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) { - uint32_t uar_mapping, retry; - int err = 0; - void *base_addr; - - for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { -#ifdef MLX5DV_UAR_ALLOC_TYPE_NC - /* Control the mapping type according to the settings. */ - uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ? - MLX5DV_UAR_ALLOC_TYPE_NC : - MLX5DV_UAR_ALLOC_TYPE_BF; -#else - RTE_SET_USED(config); - /* - * It seems we have no way to control the memory mapping type - * for the UAR, the default "Write-Combining" type is supposed. - * The UAR initialization on queue creation queries the - * actual mapping type done by Verbs/kernel and setups the - * PMD datapath accordingly. - */ - uar_mapping = 0; -#endif - sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping); -#ifdef MLX5DV_UAR_ALLOC_TYPE_NC - if (!sh->tx_uar && - uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { - if (config->dbnc == MLX5_TXDB_CACHED || - config->dbnc == MLX5_TXDB_HEURISTIC) - DRV_LOG(WARNING, "Devarg tx_db_nc setting " - "is not supported by DevX"); - /* - * In some environments like virtual machine - * the Write Combining mapped might be not supported - * and UAR allocation fails. We try "Non-Cached" - * mapping for the case. The tx_burst routines take - * the UAR mapping type into account on UAR setup - * on queue creation. - */ - DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)"); - uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; - sh->tx_uar = mlx5_glue->devx_alloc_uar - (sh->ctx, uar_mapping); - } else if (!sh->tx_uar && - uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { - if (config->dbnc == MLX5_TXDB_NCACHED) - DRV_LOG(WARNING, "Devarg tx_db_nc settings " - "is not supported by DevX"); - /* - * If Verbs/kernel does not support "Non-Cached" - * try the "Write-Combining". - */ - DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)"); - uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; - sh->tx_uar = mlx5_glue->devx_alloc_uar - (sh->ctx, uar_mapping); + int i; + struct mlx5_devx_lag_context lag_ctx = { 0 }; + struct mlx5_devx_tis_attr tis_attr = { 0 }; + + tis_attr.transport_domain = sh->td->id; + if (sh->bond.n_port) { + if (!mlx5_devx_cmd_query_lag(sh->cdev->ctx, &lag_ctx)) { + sh->lag.tx_remap_affinity[0] = + lag_ctx.tx_remap_affinity_1; + sh->lag.tx_remap_affinity[1] = + lag_ctx.tx_remap_affinity_2; + sh->lag.affinity_mode = lag_ctx.port_select_mode; + } else { + DRV_LOG(ERR, "Failed to query lag affinity."); + return -1; } -#endif - if (!sh->tx_uar) { - DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)"); - err = ENOMEM; - goto exit; + if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { + for (i = 0; i < sh->bond.n_port; i++) { + tis_attr.lag_tx_port_affinity = + MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, + sh->bond.n_port); + sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, + &tis_attr); + if (!sh->tis[i]) { + DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" + " %s.", i, sh->bond.n_port, + sh->ibdev_name); + return -1; + } + } + DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", + sh->bond.n_port, lag_ctx.tx_remap_affinity_1, + lag_ctx.tx_remap_affinity_2); + return 0; } - base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); - if (base_addr) - break; - /* - * The UARs are allocated by rdma_core within the - * IB device context, on context closure all UARs - * will be freed, should be no memory/object leakage. - */ - DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR"); - sh->tx_uar = NULL; + if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) + DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", + sh->ibdev_name); + } + tis_attr.lag_tx_port_affinity = 0; + sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); + if (!sh->tis[0]) { + DRV_LOG(ERR, "Failed to TIS 0 for bonding device" + " %s.", sh->ibdev_name); + return -1; + } + return 0; +} + +/** + * Verify and store value for share device argument. + * + * @param[in] key + * Key argument to verify. + * @param[in] val + * Value associated with key. + * @param opaque + * User data. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque) +{ + struct mlx5_sh_config *config = opaque; + signed long tmp; + + errno = 0; + tmp = strtol(val, NULL, 0); + if (errno) { + rte_errno = errno; + DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); + return -rte_errno; + } + if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { + /* Negative values are acceptable for some keys only. */ + rte_errno = EINVAL; + DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); + return -rte_errno; + } + if (strcmp(MLX5_TX_PP, key) == 0) { + unsigned long mod = tmp >= 0 ? tmp : -tmp; + + if (!mod) { + DRV_LOG(ERR, "Zero Tx packet pacing parameter."); + rte_errno = EINVAL; + return -rte_errno; + } + config->tx_pp = tmp; + } else if (strcmp(MLX5_TX_SKEW, key) == 0) { + config->tx_skew = tmp; + } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { + config->l3_vxlan_en = !!tmp; + } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { + config->vf_nl_en = !!tmp; + } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { + config->dv_esw_en = !!tmp; + } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { + if (tmp > 2) { + DRV_LOG(ERR, "Invalid %s parameter.", key); + rte_errno = EINVAL; + return -rte_errno; + } + config->dv_flow_en = tmp; + } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { + if (tmp != MLX5_XMETA_MODE_LEGACY && + tmp != MLX5_XMETA_MODE_META16 && + tmp != MLX5_XMETA_MODE_META32 && + tmp != MLX5_XMETA_MODE_MISS_INFO) { + DRV_LOG(ERR, "Invalid extensive metadata parameter."); + rte_errno = EINVAL; + return -rte_errno; + } + if (tmp != MLX5_XMETA_MODE_MISS_INFO) + config->dv_xmeta_en = tmp; + else + config->dv_miss_info = 1; + } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { + config->lacp_by_user = !!tmp; + } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { + if (tmp != MLX5_RCM_NONE && + tmp != MLX5_RCM_LIGHT && + tmp != MLX5_RCM_AGGR) { + DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); + rte_errno = EINVAL; + return -rte_errno; + } + config->reclaim_mode = tmp; + } else if (strcmp(MLX5_DECAP_EN, key) == 0) { + config->decap_en = !!tmp; + } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) { + config->allow_duplicate_pattern = !!tmp; + } + return 0; +} + +/** + * Parse user device parameters and adjust them according to device + * capabilities. + * + * @param sh + * Pointer to shared device context. + * @param mkvlist + * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. + * @param config + * Pointer to shared device configuration structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, + struct mlx5_kvargs_ctrl *mkvlist, + struct mlx5_sh_config *config) +{ + const char **params = (const char *[]){ + MLX5_TX_PP, + MLX5_TX_SKEW, + MLX5_L3_VXLAN_EN, + MLX5_VF_NL_EN, + MLX5_DV_ESW_EN, + MLX5_DV_FLOW_EN, + MLX5_DV_XMETA_EN, + MLX5_LACP_BY_USER, + MLX5_RECLAIM_MEM, + MLX5_DECAP_EN, + MLX5_ALLOW_DUPLICATE_PATTERN, + NULL, + }; + int ret = 0; + + /* Default configuration. */ + memset(config, 0, sizeof(*config)); + config->vf_nl_en = 1; + config->dv_esw_en = 1; + config->dv_flow_en = 1; + config->decap_en = 1; + config->allow_duplicate_pattern = 1; + if (mkvlist != NULL) { + /* Process parameters. */ + ret = mlx5_kvargs_process(mkvlist, params, + mlx5_dev_args_check_handler, config); + if (ret) { + DRV_LOG(ERR, "Failed to process device arguments: %s", + strerror(rte_errno)); + return -rte_errno; + } + } + /* Adjust parameters according to device capabilities. */ + if (config->dv_flow_en && !sh->dev_cap.dv_flow_en) { + DRV_LOG(WARNING, "DV flow is not supported."); + config->dv_flow_en = 0; } - /* Check whether we finally succeeded with valid UAR allocation. */ - if (!sh->tx_uar) { - DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)"); - err = ENOMEM; - goto exit; + if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) { + DRV_LOG(DEBUG, "E-Switch DV flow is not supported."); + config->dv_esw_en = 0; } - for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { - uar_mapping = 0; - sh->devx_rx_uar = mlx5_glue->devx_alloc_uar - (sh->ctx, uar_mapping); -#ifdef MLX5DV_UAR_ALLOC_TYPE_NC - if (!sh->devx_rx_uar && - uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { - /* - * Rx UAR is used to control interrupts only, - * should be no datapath noticeable impact, - * can try "Non-Cached" mapping safely. - */ - DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)"); - uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; - sh->devx_rx_uar = mlx5_glue->devx_alloc_uar - (sh->ctx, uar_mapping); - } -#endif - if (!sh->devx_rx_uar) { - DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)"); - err = ENOMEM; - goto exit; - } - base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar); - if (base_addr) - break; - /* - * The UARs are allocated by rdma_core within the - * IB device context, on context closure all UARs - * will be freed, should be no memory/object leakage. - */ - DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR"); - sh->devx_rx_uar = NULL; + if (config->dv_miss_info && config->dv_esw_en) + config->dv_xmeta_en = MLX5_XMETA_MODE_META16; + if (!config->dv_esw_en && + config->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { + DRV_LOG(WARNING, + "Metadata mode %u is not supported (no E-Switch).", + config->dv_xmeta_en); + config->dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; } - /* Check whether we finally succeeded with valid UAR allocation. */ - if (!sh->devx_rx_uar) { - DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)"); - err = ENOMEM; + if (config->tx_pp && !sh->dev_cap.txpp_en) { + DRV_LOG(ERR, "Packet pacing is not supported."); + rte_errno = ENODEV; + return -rte_errno; + } + if (!config->tx_pp && config->tx_skew) { + DRV_LOG(WARNING, + "\"tx_skew\" doesn't affect without \"tx_pp\"."); + } + /* + * If HW has bug working with tunnel packet decapsulation and scatter + * FCS, and decapsulation is needed, clear the hw_fcs_strip bit. + * Then RTE_ETH_RX_OFFLOAD_KEEP_CRC bit will not be set anymore. + */ + if (sh->dev_cap.scatter_fcs_w_decap_disable && sh->config.decap_en) + config->hw_fcs_strip = 0; + else + config->hw_fcs_strip = sh->dev_cap.hw_fcs_strip; + DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", + (config->hw_fcs_strip ? "" : "not ")); + DRV_LOG(DEBUG, "\"tx_pp\" is %d.", config->tx_pp); + DRV_LOG(DEBUG, "\"tx_skew\" is %d.", config->tx_skew); + DRV_LOG(DEBUG, "\"reclaim_mode\" is %u.", config->reclaim_mode); + DRV_LOG(DEBUG, "\"dv_esw_en\" is %u.", config->dv_esw_en); + DRV_LOG(DEBUG, "\"dv_flow_en\" is %u.", config->dv_flow_en); + DRV_LOG(DEBUG, "\"dv_xmeta_en\" is %u.", config->dv_xmeta_en); + DRV_LOG(DEBUG, "\"dv_miss_info\" is %u.", config->dv_miss_info); + DRV_LOG(DEBUG, "\"l3_vxlan_en\" is %u.", config->l3_vxlan_en); + DRV_LOG(DEBUG, "\"vf_nl_en\" is %u.", config->vf_nl_en); + DRV_LOG(DEBUG, "\"lacp_by_user\" is %u.", config->lacp_by_user); + DRV_LOG(DEBUG, "\"decap_en\" is %u.", config->decap_en); + DRV_LOG(DEBUG, "\"allow_duplicate_pattern\" is %u.", + config->allow_duplicate_pattern); + return 0; +} + +/** + * Configure realtime timestamp format. + * + * @param sh + * Pointer to mlx5_dev_ctx_shared object. + * @param hca_attr + * Pointer to DevX HCA capabilities structure. + */ +void +mlx5_rt_timestamp_config(struct mlx5_dev_ctx_shared *sh, + struct mlx5_hca_attr *hca_attr) +{ + uint32_t dw_cnt = MLX5_ST_SZ_DW(register_mtutc); + uint32_t reg[dw_cnt]; + int ret = ENOTSUP; + + if (hca_attr->access_register_user) + ret = mlx5_devx_cmd_register_read(sh->cdev->ctx, + MLX5_REGISTER_ID_MTUTC, 0, + reg, dw_cnt); + if (!ret) { + uint32_t ts_mode; + + /* MTUTC register is read successfully. */ + ts_mode = MLX5_GET(register_mtutc, reg, time_stamp_mode); + if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) + sh->dev_cap.rt_timestamp = 1; + } else { + /* Kernel does not support register reading. */ + if (hca_attr->dev_freq_khz == (NS_PER_S / MS_PER_S)) + sh->dev_cap.rt_timestamp = 1; } -exit: - return err; } /** @@ -865,8 +1390,8 @@ exit: * * @param[in] spawn * Pointer to the device attributes (name, port, etc). - * @param[in] config - * Pointer to device configuration structure. + * @param mkvlist + * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. * * @return * Pointer to mlx5_dev_ctx_shared object on success, @@ -874,12 +1399,11 @@ exit: */ struct mlx5_dev_ctx_shared * mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, - const struct mlx5_dev_config *config) + struct mlx5_kvargs_ctrl *mkvlist) { struct mlx5_dev_ctx_shared *sh; int err = 0; uint32_t i; - struct mlx5_devx_tis_attr tis_attr = { 0 }; MLX5_ASSERT(spawn); /* Secondary process should not create the shared context. */ @@ -887,8 +1411,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); /* Search for IB context by device name. */ LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { - if (!strcmp(sh->ibdev_name, - mlx5_os_get_dev_device_name(spawn->phys_dev))) { + if (!strcmp(sh->ibdev_name, spawn->phys_dev_name)) { sh->refcnt++; goto exit; } @@ -897,102 +1420,68 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, MLX5_ASSERT(spawn->max_port); sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, sizeof(struct mlx5_dev_ctx_shared) + - spawn->max_port * - sizeof(struct mlx5_dev_shared_port), + spawn->max_port * sizeof(struct mlx5_dev_shared_port), RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); if (!sh) { - DRV_LOG(ERR, "shared context allocation failure"); - rte_errno = ENOMEM; + DRV_LOG(ERR, "Shared context allocation failure."); + rte_errno = ENOMEM; goto exit; } - err = mlx5_os_open_device(spawn, config, sh); - if (!sh->ctx) + pthread_mutex_init(&sh->txpp.mutex, NULL); + sh->numa_node = spawn->cdev->dev->numa_node; + sh->cdev = spawn->cdev; + sh->esw_mode = !!(spawn->info.master || spawn->info.representor); + if (spawn->bond_info) + sh->bond = *spawn->bond_info; + err = mlx5_os_capabilities_prepare(sh); + if (err) { + DRV_LOG(ERR, "Fail to configure device capabilities."); goto error; - err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr); + } + err = mlx5_shared_dev_ctx_args_config(sh, mkvlist, &sh->config); if (err) { - DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed"); + DRV_LOG(ERR, "Failed to process device configure: %s", + strerror(rte_errno)); goto error; } sh->refcnt = 1; - sh->bond_dev = UINT16_MAX; sh->max_port = spawn->max_port; - strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), + strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->cdev->ctx), sizeof(sh->ibdev_name) - 1); - strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx), + strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->cdev->ctx), sizeof(sh->ibdev_path) - 1); /* - * Setting port_id to max unallowed value means - * there is no interrupt subhandler installed for - * the given port index i. + * Setting port_id to max unallowed value means there is no interrupt + * subhandler installed for the given port index i. */ for (i = 0; i < sh->max_port; i++) { sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; } - sh->pd = mlx5_os_alloc_pd(sh->ctx); - if (sh->pd == NULL) { - DRV_LOG(ERR, "PD allocation failure"); - err = ENOMEM; - goto error; - } - if (sh->devx) { - err = mlx5_os_get_pdn(sh->pd, &sh->pdn); - if (err) { - DRV_LOG(ERR, "Fail to extract pdn from PD"); - goto error; - } - sh->td = mlx5_devx_cmd_create_td(sh->ctx); + if (sh->cdev->config.devx) { + sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx); if (!sh->td) { DRV_LOG(ERR, "TD allocation failure"); - err = ENOMEM; + rte_errno = ENOMEM; goto error; } - tis_attr.transport_domain = sh->td->id; - sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr); - if (!sh->tis) { + if (mlx5_setup_tis(sh)) { DRV_LOG(ERR, "TIS allocation failure"); - err = ENOMEM; + rte_errno = ENOMEM; goto error; } - err = mlx5_alloc_rxtx_uars(sh, config); + err = mlx5_rxtx_uars_prepare(sh); if (err) goto error; - MLX5_ASSERT(sh->tx_uar); - MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar)); - - MLX5_ASSERT(sh->devx_rx_uar); - MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); - } #ifndef RTE_ARCH_64 - /* Initialize UAR access locks for 32bit implementations. */ - rte_spinlock_init(&sh->uar_lock_cq); - for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) - rte_spinlock_init(&sh->uar_lock[i]); + } else { + /* Initialize UAR access locks for 32bit implementations. */ + rte_spinlock_init(&sh->uar_lock_cq); + for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) + rte_spinlock_init(&sh->uar_lock[i]); #endif - /* - * Once the device is added to the list of memory event - * callback, its global MR cache table cannot be expanded - * on the fly because of deadlock. If it overflows, lookup - * should be done by searching MR list linearly, which is slow. - * - * At this point the device is not added to the memory - * event list yet, context is just being created. - */ - err = mlx5_mr_btree_init(&sh->share_cache.cache, - MLX5_MR_BTREE_CACHE_N * 2, - spawn->pci_dev->device.numa_node); - if (err) { - err = rte_errno; - goto error; } - mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb, - &sh->share_cache.dereg_mr_cb); mlx5_os_dev_shared_handler_install(sh); - sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD); - if (!sh->cnt_id_tbl) { - err = rte_errno; - goto error; - } if (LIST_EMPTY(&mlx5_dev_ctx_list)) { err = mlx5_flow_os_init_workspace_once(); if (err) @@ -1000,12 +1489,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, } mlx5_flow_aging_init(sh); mlx5_flow_counters_mng_init(sh); - mlx5_flow_ipool_create(sh, config); - /* Add device to memory callback list. */ - rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); - LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, - sh, mem_event_cb); - rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); + mlx5_flow_ipool_create(sh); /* Add context to the global device list. */ LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); rte_spinlock_init(&sh->geneve_tlv_opt_sl); @@ -1013,25 +1497,19 @@ exit: pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); return sh; error: + err = rte_errno; pthread_mutex_destroy(&sh->txpp.mutex); pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); MLX5_ASSERT(sh); - if (sh->cnt_id_tbl) - mlx5_l3t_destroy(sh->cnt_id_tbl); - if (sh->tis) - claim_zero(mlx5_devx_cmd_destroy(sh->tis)); + mlx5_rxtx_uars_release(sh); + i = 0; + do { + if (sh->tis[i]) + claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); + } while (++i < (uint32_t)sh->bond.n_port); if (sh->td) claim_zero(mlx5_devx_cmd_destroy(sh->td)); - if (sh->devx_rx_uar) - mlx5_glue->devx_free_uar(sh->devx_rx_uar); - if (sh->tx_uar) - mlx5_glue->devx_free_uar(sh->tx_uar); - if (sh->pd) - claim_zero(mlx5_os_dealloc_pd(sh->pd)); - if (sh->ctx) - claim_zero(mlx5_glue->close_device(sh->ctx)); mlx5_free(sh); - MLX5_ASSERT(err > 0); rte_errno = err; return NULL; } @@ -1046,6 +1524,9 @@ error: void mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) { + int ret; + int i = 0; + pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); #ifdef RTE_LIBRTE_MLX5_DEBUG /* Check the object presence in the list. */ @@ -1066,47 +1547,48 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); if (--sh->refcnt) goto exit; - /* Remove from memory callback device list. */ - rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); - LIST_REMOVE(sh, mem_event_cb); - rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); - /* Release created Memory Regions. */ - mlx5_mr_release_cache(&sh->share_cache); + /* Stop watching for mempool events and unregister all mempools. */ + if (!sh->cdev->config.mr_mempool_reg_en) { + ret = rte_mempool_event_callback_unregister + (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); + if (ret == 0) + rte_mempool_walk + (mlx5_dev_ctx_shared_rx_mempool_unregister_cb, sh); + } /* Remove context from the global device list. */ LIST_REMOVE(sh, next); - /* Release flow workspaces objects on the last device. */ - if (LIST_EMPTY(&mlx5_dev_ctx_list)) + /* Release resources on the last device removal. */ + if (LIST_EMPTY(&mlx5_dev_ctx_list)) { + mlx5_os_net_cleanup(); mlx5_flow_os_release_workspace(); + } pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); + if (sh->flex_parsers_dv) { + mlx5_list_destroy(sh->flex_parsers_dv); + sh->flex_parsers_dv = NULL; + } /* * Ensure there is no async event handler installed. * Only primary process handles async device events. **/ mlx5_flow_counters_mng_close(sh); + if (sh->ct_mng) + mlx5_flow_aso_ct_mng_close(sh); if (sh->aso_age_mng) { mlx5_flow_aso_age_mng_close(sh); sh->aso_age_mng = NULL; } + if (sh->mtrmng) + mlx5_aso_flow_mtrs_mng_close(sh); mlx5_flow_ipool_destroy(sh); mlx5_os_dev_shared_handler_uninstall(sh); - if (sh->cnt_id_tbl) { - mlx5_l3t_destroy(sh->cnt_id_tbl); - sh->cnt_id_tbl = NULL; - } - if (sh->tx_uar) { - mlx5_glue->devx_free_uar(sh->tx_uar); - sh->tx_uar = NULL; - } - if (sh->pd) - claim_zero(mlx5_os_dealloc_pd(sh->pd)); - if (sh->tis) - claim_zero(mlx5_devx_cmd_destroy(sh->tis)); + mlx5_rxtx_uars_release(sh); + do { + if (sh->tis[i]) + claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); + } while (++i < sh->bond.n_port); if (sh->td) claim_zero(mlx5_devx_cmd_destroy(sh->td)); - if (sh->devx_rx_uar) - mlx5_glue->devx_free_uar(sh->devx_rx_uar); - if (sh->ctx) - claim_zero(mlx5_glue->close_device(sh->ctx)); MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL); pthread_mutex_destroy(&sh->txpp.mutex); mlx5_free(sh); @@ -1129,6 +1611,7 @@ mlx5_free_table_hash_list(struct mlx5_priv *priv) if (!sh->flow_tbls) return; mlx5_hlist_destroy(sh->flow_tbls); + sh->flow_tbls = NULL; } /** @@ -1148,20 +1631,22 @@ mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) /* Tables are only used in DV and DR modes. */ #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) struct mlx5_dev_ctx_shared *sh = priv->sh; - char s[MLX5_HLIST_NAMESIZE]; + char s[MLX5_NAME_SIZE]; MLX5_ASSERT(sh); snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, - 0, 0, flow_dv_tbl_create_cb, + false, true, sh, + flow_dv_tbl_create_cb, flow_dv_tbl_match_cb, - flow_dv_tbl_remove_cb); + flow_dv_tbl_remove_cb, + flow_dv_tbl_clone_cb, + flow_dv_tbl_clone_free_cb); if (!sh->flow_tbls) { DRV_LOG(ERR, "flow tables with hash creation failed."); err = ENOMEM; return err; } - sh->flow_tbls->ctx = sh; #ifndef HAVE_MLX5DV_DR struct rte_flow_error error; struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; @@ -1171,9 +1656,12 @@ mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) * because DV expect to see them even if they cannot be created by * RDMA-CORE. */ - if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, NULL, 0, 1, &error) || - !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, NULL, 0, 1, &error) || - !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, NULL, 0, 1, &error)) { + if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, + NULL, 0, 1, 0, &error) || + !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, + NULL, 0, 1, 0, &error) || + !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, + NULL, 0, 1, 0, &error)) { err = ENOMEM; goto error; } @@ -1220,10 +1708,10 @@ mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, struct rte_eth_udp_tunnel *udp_tunnel) { MLX5_ASSERT(udp_tunnel != NULL); - if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN && + if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN && udp_tunnel->udp_port == 4789) return 0; - if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE && + if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN_GPE && udp_tunnel->udp_port == 4790) return 0; return -ENOTSUP; @@ -1245,12 +1733,13 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) struct mlx5_proc_priv *ppriv; size_t ppriv_size; + mlx5_proc_priv_uninit(dev); /* * UAR register table follows the process private structure. BlueFlame * registers for Tx queues are stored in the table. */ - ppriv_size = - sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); + ppriv_size = sizeof(struct mlx5_proc_priv) + + priv->txqs_n * sizeof(struct mlx5_uar_data); ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, RTE_CACHE_LINE_SIZE, dev->device->numa_node); if (!ppriv) { @@ -1259,6 +1748,8 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) } ppriv->uar_table_sz = priv->txqs_n; dev->process_private = ppriv; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + priv->sh->pppriv = ppriv; return 0; } @@ -1305,8 +1796,8 @@ mlx5_dev_close(struct rte_eth_dev *dev) return 0; DRV_LOG(DEBUG, "port %u closing device \"%s\"", dev->data->port_id, - ((priv->sh->ctx != NULL) ? - mlx5_os_get_ctx_device_name(priv->sh->ctx) : "")); + ((priv->sh->cdev->ctx != NULL) ? + mlx5_os_get_ctx_device_name(priv->sh->cdev->ctx) : "")); /* * If default mreg copy action is removed at the stop stage, * the search will return none and nothing will be done anymore. @@ -1317,24 +1808,26 @@ mlx5_dev_close(struct rte_eth_dev *dev) * If all the flows are already flushed in the device stop stage, * then this will return directly without any action. */ - mlx5_flow_list_flush(dev, &priv->flows, true); - mlx5_shared_action_flush(dev); + mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); + mlx5_action_handle_flush(dev); mlx5_flow_meter_flush(dev, NULL); /* Prevent crashes when queues are still in use. */ - dev->rx_pkt_burst = removed_rx_burst; - dev->tx_pkt_burst = removed_tx_burst; + dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; + dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; rte_wmb(); /* Disable datapath on secondary process. */ mlx5_mp_os_req_stop_rxtx(dev); /* Free the eCPRI flex parser resource. */ mlx5_flex_parser_ecpri_release(dev); - if (priv->rxqs != NULL) { + mlx5_flex_item_port_cleanup(dev); + if (priv->rxq_privs != NULL) { /* XXX race condition if mlx5_rx_burst() is still running. */ rte_delay_us_sleep(1000); for (i = 0; (i != priv->rxqs_n); ++i) mlx5_rxq_release(dev, i); priv->rxqs_n = 0; - priv->rxqs = NULL; + mlx5_free(priv->rxq_privs); + priv->rxq_privs = NULL; } if (priv->txqs != NULL) { /* XXX race condition if mlx5_tx_burst() is still running. */ @@ -1359,7 +1852,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) mlx5_free(priv->rss_conf.rss_key); if (priv->reta_idx != NULL) mlx5_free(priv->reta_idx); - if (priv->config.vf) + if (priv->sh->dev_cap.vf) mlx5_os_mac_addr_flush(dev); if (priv->nl_socket_route >= 0) close(priv->nl_socket_route); @@ -1395,11 +1888,12 @@ mlx5_dev_close(struct rte_eth_dev *dev) if (ret) DRV_LOG(WARNING, "port %u some flows still remain", dev->data->port_id); - mlx5_cache_list_destroy(&priv->hrxqs); + if (priv->hrxqs) + mlx5_list_destroy(priv->hrxqs); /* * Free the shared context in last turn, because the cleanup * routines above may use some shared fields, like - * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing + * mlx5_os_mac_addr_flush() uses ibdev_path for retrieving * ifindex if Netlink fails. */ mlx5_free_shared_dev_ctx(priv->sh); @@ -1407,7 +1901,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) unsigned int c = 0; uint16_t port_id; - MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { + MLX5_ETH_FOREACH_DEV(port_id, dev->device) { struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private; @@ -1451,6 +1945,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .xstats_get_names = mlx5_xstats_get_names, .fw_version_get = mlx5_fw_version_get, .dev_infos_get = mlx5_dev_infos_get, + .representor_info_get = mlx5_representor_info_get, .read_clock = mlx5_txpp_read_clock, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, @@ -1477,7 +1972,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .reta_query = mlx5_dev_rss_reta_query, .rss_hash_update = mlx5_rss_hash_update, .rss_hash_conf_get = mlx5_rss_hash_conf_get, - .filter_ctrl = mlx5_dev_filter_ctrl, + .flow_ops_get = mlx5_flow_ops_get, .rxq_info_get = mlx5_rxq_info_get, .txq_info_get = mlx5_txq_info_get, .rx_burst_mode_get = mlx5_rx_burst_mode_get, @@ -1496,6 +1991,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, + .get_monitor_addr = mlx5_get_monitor_addr, }; /* Available operations from secondary process. */ @@ -1507,6 +2003,7 @@ const struct eth_dev_ops mlx5_dev_sec_ops = { .xstats_get_names = mlx5_xstats_get_names, .fw_version_get = mlx5_fw_version_get, .dev_infos_get = mlx5_dev_infos_get, + .representor_info_get = mlx5_representor_info_get, .read_clock = mlx5_txpp_read_clock, .rx_queue_start = mlx5_rx_queue_start, .rx_queue_stop = mlx5_rx_queue_stop, @@ -1540,6 +2037,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .xstats_get_names = mlx5_xstats_get_names, .fw_version_get = mlx5_fw_version_get, .dev_infos_get = mlx5_dev_infos_get, + .representor_info_get = mlx5_representor_info_get, .read_clock = mlx5_txpp_read_clock, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, @@ -1562,7 +2060,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .mtu_set = mlx5_dev_set_mtu, .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, .vlan_offload_set = mlx5_vlan_offload_set, - .filter_ctrl = mlx5_dev_filter_ctrl, + .flow_ops_get = mlx5_flow_ops_get, .rxq_info_get = mlx5_rxq_info_get, .txq_info_get = mlx5_txq_info_get, .rx_burst_mode_get = mlx5_rx_burst_mode_get, @@ -1580,6 +2078,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, + .get_monitor_addr = mlx5_get_monitor_addr, }; /** @@ -1596,10 +2095,9 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_args_check(const char *key, const char *val, void *opaque) +mlx5_port_args_check_handler(const char *key, const char *val, void *opaque) { - struct mlx5_dev_config *config = opaque; - unsigned long mod; + struct mlx5_port_config *config = opaque; signed long tmp; /* No-op, port representors are processed in mlx5_dev_spawn(). */ @@ -1612,13 +2110,12 @@ mlx5_args_check(const char *key, const char *val, void *opaque) DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); return -rte_errno; } - if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { + if (tmp < 0) { /* Negative values are acceptable for some keys only. */ rte_errno = EINVAL; DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); return -rte_errno; } - mod = tmp >= 0 ? tmp : -tmp; if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) { DRV_LOG(ERR, "invalid CQE compression " @@ -1633,9 +2130,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { config->mprq.enabled = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { - config->mprq.stride_num_n = tmp; + config->mprq.log_stride_num = tmp; } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { - config->mprq.stride_size_n = tmp; + config->mprq.log_stride_size = tmp; } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { config->mprq.max_memcpy_len = tmp; } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { @@ -1656,16 +2153,6 @@ mlx5_args_check(const char *key, const char *val, void *opaque) DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { config->mps = !!tmp; - } else if (strcmp(MLX5_TX_DB_NC, key) == 0) { - if (tmp != MLX5_TXDB_CACHED && - tmp != MLX5_TXDB_NCACHED && - tmp != MLX5_TXDB_HEURISTIC) { - DRV_LOG(ERR, "invalid Tx doorbell " - "mapping parameter"); - rte_errno = EINVAL; - return -rte_errno; - } - config->dbnc = tmp; } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { @@ -1674,86 +2161,41 @@ mlx5_args_check(const char *key, const char *val, void *opaque) config->txq_inline_mpw = tmp; } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); - } else if (strcmp(MLX5_TX_PP, key) == 0) { - if (!mod) { - DRV_LOG(ERR, "Zero Tx packet pacing parameter"); - rte_errno = EINVAL; - return -rte_errno; - } - config->tx_pp = tmp; - } else if (strcmp(MLX5_TX_SKEW, key) == 0) { - config->tx_skew = tmp; } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { config->rx_vec_en = !!tmp; - } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { - config->l3_vxlan_en = !!tmp; - } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { - config->vf_nl_en = !!tmp; - } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { - config->dv_esw_en = !!tmp; - } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { - config->dv_flow_en = !!tmp; - } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { - if (tmp != MLX5_XMETA_MODE_LEGACY && - tmp != MLX5_XMETA_MODE_META16 && - tmp != MLX5_XMETA_MODE_META32 && - tmp != MLX5_XMETA_MODE_MISS_INFO) { - DRV_LOG(ERR, "invalid extensive " - "metadata parameter"); - rte_errno = EINVAL; - return -rte_errno; - } - if (tmp != MLX5_XMETA_MODE_MISS_INFO) - config->dv_xmeta_en = tmp; - else - config->dv_miss_info = 1; - } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { - config->lacp_by_user = !!tmp; - } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { - config->mr_ext_memseg_en = !!tmp; } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { config->max_dump_files_num = tmp; } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { - config->lro.timeout = tmp; - } else if (strcmp(MLX5_CLASS_ARG_NAME, key) == 0) { - DRV_LOG(DEBUG, "class argument is %s.", val); + config->lro_timeout = tmp; } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { config->log_hp_size = tmp; - } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { - if (tmp != MLX5_RCM_NONE && - tmp != MLX5_RCM_LIGHT && - tmp != MLX5_RCM_AGGR) { - DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); - rte_errno = EINVAL; - return -rte_errno; - } - config->reclaim_mode = tmp; - } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) { - config->sys_mem_en = !!tmp; - } else if (strcmp(MLX5_DECAP_EN, key) == 0) { - config->decap_en = !!tmp; - } else { - DRV_LOG(WARNING, "%s: unknown parameter", key); - rte_errno = EINVAL; - return -rte_errno; + } else if (strcmp(MLX5_DELAY_DROP, key) == 0) { + config->std_delay_drop = !!(tmp & MLX5_DELAY_DROP_STANDARD); + config->hp_delay_drop = !!(tmp & MLX5_DELAY_DROP_HAIRPIN); } return 0; } /** - * Parse device parameters. + * Parse user port parameters and adjust them according to device capabilities. * + * @param priv + * Pointer to shared device context. + * @param mkvlist + * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. * @param config - * Pointer to device configuration structure. - * @param devargs - * Device arguments structure. + * Pointer to port configuration structure. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) +mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist, + struct mlx5_port_config *config) { + struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; + struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; + bool devx = priv->sh->cdev->config.devx; const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, MLX5_RXQ_PKT_PAD_EN, @@ -1771,54 +2213,341 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) MLX5_TXQ_MPW_EN, MLX5_TXQ_MPW_HDR_DSEG_EN, MLX5_TXQ_MAX_INLINE_LEN, - MLX5_TX_DB_NC, - MLX5_TX_PP, - MLX5_TX_SKEW, MLX5_TX_VEC_EN, MLX5_RX_VEC_EN, - MLX5_L3_VXLAN_EN, - MLX5_VF_NL_EN, - MLX5_DV_ESW_EN, - MLX5_DV_FLOW_EN, - MLX5_DV_XMETA_EN, - MLX5_LACP_BY_USER, - MLX5_MR_EXT_MEMSEG_EN, MLX5_REPRESENTOR, MLX5_MAX_DUMP_FILES_NUM, MLX5_LRO_TIMEOUT_USEC, - MLX5_CLASS_ARG_NAME, MLX5_HP_BUF_SIZE, - MLX5_RECLAIM_MEM, - MLX5_SYS_MEM_EN, - MLX5_DECAP_EN, + MLX5_DELAY_DROP, NULL, }; - struct rte_kvargs *kvlist; int ret = 0; - int i; - if (devargs == NULL) + /* Default configuration. */ + memset(config, 0, sizeof(*config)); + config->mps = MLX5_ARG_UNSET; + config->cqe_comp = 1; + config->rx_vec_en = 1; + config->txq_inline_max = MLX5_ARG_UNSET; + config->txq_inline_min = MLX5_ARG_UNSET; + config->txq_inline_mpw = MLX5_ARG_UNSET; + config->txqs_inline = MLX5_ARG_UNSET; + config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; + config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; + config->mprq.log_stride_num = MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM; + config->log_hp_size = MLX5_ARG_UNSET; + config->std_delay_drop = 0; + config->hp_delay_drop = 0; + if (mkvlist != NULL) { + /* Process parameters. */ + ret = mlx5_kvargs_process(mkvlist, params, + mlx5_port_args_check_handler, config); + if (ret) { + DRV_LOG(ERR, "Failed to process port arguments: %s", + strerror(rte_errno)); + return -rte_errno; + } + } + /* Adjust parameters according to device capabilities. */ + if (config->hw_padding && !dev_cap->hw_padding) { + DRV_LOG(DEBUG, "Rx end alignment padding isn't supported."); + config->hw_padding = 0; + } else if (config->hw_padding) { + DRV_LOG(DEBUG, "Rx end alignment padding is enabled."); + } + /* + * MPW is disabled by default, while the Enhanced MPW is enabled + * by default. + */ + if (config->mps == MLX5_ARG_UNSET) + config->mps = (dev_cap->mps == MLX5_MPW_ENHANCED) ? + MLX5_MPW_ENHANCED : MLX5_MPW_DISABLED; + else + config->mps = config->mps ? dev_cap->mps : MLX5_MPW_DISABLED; + DRV_LOG(INFO, "%sMPS is %s", + config->mps == MLX5_MPW_ENHANCED ? "enhanced " : + config->mps == MLX5_MPW ? "legacy " : "", + config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); + /* LRO is supported only when DV flow enabled. */ + if (dev_cap->lro_supported && !priv->sh->config.dv_flow_en) + dev_cap->lro_supported = 0; + if (dev_cap->lro_supported) { + /* + * If LRO timeout is not configured by application, + * use the minimal supported value. + */ + if (!config->lro_timeout) + config->lro_timeout = + hca_attr->lro_timer_supported_periods[0]; + DRV_LOG(DEBUG, "LRO session timeout set to %d usec.", + config->lro_timeout); + } + if (config->cqe_comp && !dev_cap->cqe_comp) { + DRV_LOG(WARNING, "Rx CQE 128B compression is not supported."); + config->cqe_comp = 0; + } + if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX && + (!devx || !hca_attr->mini_cqe_resp_flow_tag)) { + DRV_LOG(WARNING, + "Flow Tag CQE compression format isn't supported."); + config->cqe_comp = 0; + } + if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX && + (!devx || !hca_attr->mini_cqe_resp_l3_l4_tag)) { + DRV_LOG(WARNING, + "L3/L4 Header CQE compression format isn't supported."); + config->cqe_comp = 0; + } + DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.", + config->cqe_comp ? "" : "not "); + if ((config->std_delay_drop || config->hp_delay_drop) && + !dev_cap->rq_delay_drop_en) { + config->std_delay_drop = 0; + config->hp_delay_drop = 0; + DRV_LOG(WARNING, "dev_port-%u: Rxq delay drop isn't supported.", + priv->dev_port); + } + if (config->mprq.enabled && !priv->sh->dev_cap.mprq.enabled) { + DRV_LOG(WARNING, "Multi-Packet RQ isn't supported."); + config->mprq.enabled = 0; + } + if (config->max_dump_files_num == 0) + config->max_dump_files_num = 128; + /* Detect minimal data bytes to inline. */ + mlx5_set_min_inline(priv); + DRV_LOG(DEBUG, "VLAN insertion in WQE is %ssupported.", + config->hw_vlan_insert ? "" : "not "); + DRV_LOG(DEBUG, "\"rxq_pkt_pad_en\" is %u.", config->hw_padding); + DRV_LOG(DEBUG, "\"rxq_cqe_comp_en\" is %u.", config->cqe_comp); + DRV_LOG(DEBUG, "\"cqe_comp_fmt\" is %u.", config->cqe_comp_fmt); + DRV_LOG(DEBUG, "\"rx_vec_en\" is %u.", config->rx_vec_en); + DRV_LOG(DEBUG, "Standard \"delay_drop\" is %u.", + config->std_delay_drop); + DRV_LOG(DEBUG, "Hairpin \"delay_drop\" is %u.", config->hp_delay_drop); + DRV_LOG(DEBUG, "\"max_dump_files_num\" is %u.", + config->max_dump_files_num); + DRV_LOG(DEBUG, "\"log_hp_size\" is %u.", config->log_hp_size); + DRV_LOG(DEBUG, "\"mprq_en\" is %u.", config->mprq.enabled); + DRV_LOG(DEBUG, "\"mprq_log_stride_num\" is %u.", + config->mprq.log_stride_num); + DRV_LOG(DEBUG, "\"mprq_log_stride_size\" is %u.", + config->mprq.log_stride_size); + DRV_LOG(DEBUG, "\"mprq_max_memcpy_len\" is %u.", + config->mprq.max_memcpy_len); + DRV_LOG(DEBUG, "\"rxqs_min_mprq\" is %u.", config->mprq.min_rxqs_num); + DRV_LOG(DEBUG, "\"lro_timeout_usec\" is %u.", config->lro_timeout); + DRV_LOG(DEBUG, "\"txq_mpw_en\" is %d.", config->mps); + DRV_LOG(DEBUG, "\"txqs_min_inline\" is %d.", config->txqs_inline); + DRV_LOG(DEBUG, "\"txq_inline_min\" is %d.", config->txq_inline_min); + DRV_LOG(DEBUG, "\"txq_inline_max\" is %d.", config->txq_inline_max); + DRV_LOG(DEBUG, "\"txq_inline_mpw\" is %d.", config->txq_inline_mpw); + return 0; +} + +/** + * Print the key for device argument. + * + * It is "dummy" handler whose whole purpose is to enable using + * mlx5_kvargs_process() function which set devargs as used. + * + * @param key + * Key argument. + * @param val + * Value associated with key, unused. + * @param opaque + * Unused, can be NULL. + * + * @return + * 0 on success, function cannot fail. + */ +static int +mlx5_dummy_handler(const char *key, const char *val, void *opaque) +{ + DRV_LOG(DEBUG, "\tKey: \"%s\" is set as used.", key); + RTE_SET_USED(opaque); + RTE_SET_USED(val); + return 0; +} + +/** + * Set requested devargs as used when device is already spawned. + * + * It is necessary since it is valid to ask probe again for existing device, + * if its devargs don't assign as used, mlx5_kvargs_validate() will fail. + * + * @param name + * Name of the existing device. + * @param port_id + * Port identifier of the device. + * @param mkvlist + * Pointer to mlx5 kvargs control to sign as used. + */ +void +mlx5_port_args_set_used(const char *name, uint16_t port_id, + struct mlx5_kvargs_ctrl *mkvlist) +{ + const char **params = (const char *[]){ + MLX5_RXQ_CQE_COMP_EN, + MLX5_RXQ_PKT_PAD_EN, + MLX5_RX_MPRQ_EN, + MLX5_RX_MPRQ_LOG_STRIDE_NUM, + MLX5_RX_MPRQ_LOG_STRIDE_SIZE, + MLX5_RX_MPRQ_MAX_MEMCPY_LEN, + MLX5_RXQS_MIN_MPRQ, + MLX5_TXQ_INLINE, + MLX5_TXQ_INLINE_MIN, + MLX5_TXQ_INLINE_MAX, + MLX5_TXQ_INLINE_MPW, + MLX5_TXQS_MIN_INLINE, + MLX5_TXQS_MAX_VEC, + MLX5_TXQ_MPW_EN, + MLX5_TXQ_MPW_HDR_DSEG_EN, + MLX5_TXQ_MAX_INLINE_LEN, + MLX5_TX_VEC_EN, + MLX5_RX_VEC_EN, + MLX5_REPRESENTOR, + MLX5_MAX_DUMP_FILES_NUM, + MLX5_LRO_TIMEOUT_USEC, + MLX5_HP_BUF_SIZE, + MLX5_DELAY_DROP, + NULL, + }; + + /* Secondary process should not handle devargs. */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + MLX5_ASSERT(mkvlist != NULL); + DRV_LOG(DEBUG, "Ethernet device \"%s\" for port %u " + "already exists, set devargs as used:", name, port_id); + /* This function cannot fail with this handler. */ + mlx5_kvargs_process(mkvlist, params, mlx5_dummy_handler, NULL); +} + +/** + * Check sibling device configurations when probing again. + * + * Sibling devices sharing infiniband device context should have compatible + * configurations. This regards representors and bonding device. + * + * @param cdev + * Pointer to mlx5 device structure. + * @param mkvlist + * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_probe_again_args_validate(struct mlx5_common_device *cdev, + struct mlx5_kvargs_ctrl *mkvlist) +{ + struct mlx5_dev_ctx_shared *sh = NULL; + struct mlx5_sh_config *config; + int ret; + + /* Secondary process should not handle devargs. */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - /* Following UGLY cast is done to pass checkpatch. */ - kvlist = rte_kvargs_parse(devargs->args, params); - if (kvlist == NULL) { - rte_errno = EINVAL; + pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); + /* Search for IB context by common device pointer. */ + LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) + if (sh->cdev == cdev) + break; + pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); + /* There is sh for this device -> it isn't probe again. */ + if (sh == NULL) + return 0; + config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, + sizeof(struct mlx5_sh_config), + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); + if (config == NULL) { + rte_errno = -ENOMEM; return -rte_errno; } - /* Process parameters. */ - for (i = 0; (params[i] != NULL); ++i) { - if (rte_kvargs_count(kvlist, params[i])) { - ret = rte_kvargs_process(kvlist, params[i], - mlx5_args_check, config); - if (ret) { - rte_errno = EINVAL; - rte_kvargs_free(kvlist); - return -rte_errno; - } - } + /* + * Creates a temporary IB context configure structure according to new + * devargs attached in probing again. + */ + ret = mlx5_shared_dev_ctx_args_config(sh, mkvlist, config); + if (ret) { + DRV_LOG(ERR, "Failed to process device configure: %s", + strerror(rte_errno)); + mlx5_free(config); + return ret; + } + /* + * Checks the match between the temporary structure and the existing + * IB context structure. + */ + if (sh->config.dv_flow_en ^ config->dv_flow_en) { + DRV_LOG(ERR, "\"dv_flow_en\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if ((sh->config.dv_xmeta_en ^ config->dv_xmeta_en) || + (sh->config.dv_miss_info ^ config->dv_miss_info)) { + DRV_LOG(ERR, "\"dv_xmeta_en\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.dv_esw_en ^ config->dv_esw_en) { + DRV_LOG(ERR, "\"dv_esw_en\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.reclaim_mode ^ config->reclaim_mode) { + DRV_LOG(ERR, "\"reclaim_mode\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.allow_duplicate_pattern ^ + config->allow_duplicate_pattern) { + DRV_LOG(ERR, "\"allow_duplicate_pattern\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; } - rte_kvargs_free(kvlist); + if (sh->config.l3_vxlan_en ^ config->l3_vxlan_en) { + DRV_LOG(ERR, "\"l3_vxlan_en\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.decap_en ^ config->decap_en) { + DRV_LOG(ERR, "\"decap_en\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.lacp_by_user ^ config->lacp_by_user) { + DRV_LOG(ERR, "\"lacp_by_user\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.tx_pp ^ config->tx_pp) { + DRV_LOG(ERR, "\"tx_pp\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + if (sh->config.tx_skew ^ config->tx_skew) { + DRV_LOG(ERR, "\"tx_skew\" " + "configuration mismatch for shared %s context.", + sh->ibdev_name); + goto error; + } + mlx5_free(config); return 0; +error: + mlx5_free(config); + rte_errno = EINVAL; + return -rte_errno; } /** @@ -1832,35 +2561,37 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx * and none (0 bytes) for other NICs * - * @param spawn - * Verbs device parameters (name, port, switch_info) to spawn. - * @param config - * Device configuration parameters. + * @param priv + * Pointer to the private device data structure. */ void -mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, - struct mlx5_dev_config *config) +mlx5_set_min_inline(struct mlx5_priv *priv) { + struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; + struct mlx5_port_config *config = &priv->config; + if (config->txq_inline_min != MLX5_ARG_UNSET) { /* Application defines size of inlined data explicitly. */ - switch (spawn->pci_dev->id.device_id) { - case PCI_DEVICE_ID_MELLANOX_CONNECTX4: - case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: - if (config->txq_inline_min < - (int)MLX5_INLINE_HSIZE_L2) { - DRV_LOG(DEBUG, - "txq_inline_mix aligned to minimal" - " ConnectX-4 required value %d", - (int)MLX5_INLINE_HSIZE_L2); - config->txq_inline_min = MLX5_INLINE_HSIZE_L2; + if (priv->pci_dev != NULL) { + switch (priv->pci_dev->id.device_id) { + case PCI_DEVICE_ID_MELLANOX_CONNECTX4: + case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: + if (config->txq_inline_min < + (int)MLX5_INLINE_HSIZE_L2) { + DRV_LOG(DEBUG, + "txq_inline_mix aligned to minimal ConnectX-4 required value %d", + (int)MLX5_INLINE_HSIZE_L2); + config->txq_inline_min = + MLX5_INLINE_HSIZE_L2; + } + break; } - break; } goto exit; } - if (config->hca_attr.eth_net_offloads) { + if (hca_attr->eth_net_offloads) { /* We have DevX enabled, inline mode queried successfully. */ - switch (config->hca_attr.wqe_inline_mode) { + switch (hca_attr->wqe_inline_mode) { case MLX5_CAP_INLINE_MODE_L2: /* outer L2 header must be inlined. */ config->txq_inline_min = MLX5_INLINE_HSIZE_L2; @@ -1869,14 +2600,14 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, /* No inline data are required by NIC. */ config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; config->hw_vlan_insert = - config->hca_attr.wqe_vlan_insert; + hca_attr->wqe_vlan_insert; DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); goto exit; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: /* inline mode is defined by NIC vport context. */ - if (!config->hca_attr.eth_virt) + if (!hca_attr->eth_virt) break; - switch (config->hca_attr.vport_inline_mode) { + switch (hca_attr->vport_inline_mode) { case MLX5_INLINE_MODE_NONE: config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; @@ -1908,12 +2639,16 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, } } } + if (priv->pci_dev == NULL) { + config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; + goto exit; + } /* * We get here if we are unable to deduce * inline data size with DevX. Try PCI ID * to determine old NICs. */ - switch (spawn->pci_dev->id.device_id) { + switch (priv->pci_dev->id.device_id) { case PCI_DEVICE_ID_MELLANOX_CONNECTX4: case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: @@ -1955,7 +2690,7 @@ mlx5_set_metadata_mask(struct rte_eth_dev *dev) uint32_t meta, mark, reg_c0; reg_c0 = ~priv->vport_meta_mask; - switch (priv->config.dv_xmeta_en) { + switch (sh->config.dv_xmeta_en) { case MLX5_XMETA_MODE_LEGACY: meta = UINT32_MAX; mark = MLX5_FLOW_MARK_MASK; @@ -1975,21 +2710,21 @@ mlx5_set_metadata_mask(struct rte_eth_dev *dev) break; } if (sh->dv_mark_mask && sh->dv_mark_mask != mark) - DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", + DRV_LOG(WARNING, "metadata MARK mask mismatch %08X:%08X", sh->dv_mark_mask, mark); else sh->dv_mark_mask = mark; if (sh->dv_meta_mask && sh->dv_meta_mask != meta) - DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", + DRV_LOG(WARNING, "metadata META mask mismatch %08X:%08X", sh->dv_meta_mask, meta); else sh->dv_meta_mask = meta; if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) - DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", + DRV_LOG(WARNING, "metadata reg_c0 mask mismatch %08X:%08X", sh->dv_meta_mask, reg_c0); else sh->dv_regc0_mask = reg_c0; - DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en); + DRV_LOG(DEBUG, "metadata mode %u", sh->config.dv_xmeta_en); DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); @@ -2015,86 +2750,36 @@ rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) return RTE_DIM(dynf_names); } -/** - * Comparison callback to sort device data. - * - * This is meant to be used with qsort(). - * - * @param a[in] - * Pointer to pointer to first data object. - * @param b[in] - * Pointer to pointer to second data object. - * - * @return - * 0 if both objects are equal, less than 0 if the first argument is less - * than the second, greater than 0 otherwise. - */ -int -mlx5_dev_check_sibling_config(struct mlx5_priv *priv, - struct mlx5_dev_config *config) -{ - struct mlx5_dev_ctx_shared *sh = priv->sh; - struct mlx5_dev_config *sh_conf = NULL; - uint16_t port_id; - - MLX5_ASSERT(sh); - /* Nothing to compare for the single/first device. */ - if (sh->refcnt == 1) - return 0; - /* Find the device with shared context. */ - MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { - struct mlx5_priv *opriv = - rte_eth_devices[port_id].data->dev_private; - - if (opriv && opriv != priv && opriv->sh == sh) { - sh_conf = &opriv->config; - break; - } - } - if (!sh_conf) - return 0; - if (sh_conf->dv_flow_en ^ config->dv_flow_en) { - DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch" - " for shared %s context", sh->ibdev_name); - rte_errno = EINVAL; - return rte_errno; - } - if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) { - DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch" - " for shared %s context", sh->ibdev_name); - rte_errno = EINVAL; - return rte_errno; - } - return 0; -} - /** * Look for the ethernet device belonging to mlx5 driver. * * @param[in] port_id * port_id to start looking for device. - * @param[in] pci_dev - * Pointer to the hint PCI device. When device is being probed + * @param[in] odev + * Pointer to the hint device. When device is being probed * the its siblings (master and preceding representors might * not have assigned driver yet (because the mlx5_os_pci_probe() - * is not completed yet, for this case match on hint PCI + * is not completed yet, for this case match on hint * device may be used to detect sibling device. * * @return * port_id of found device, RTE_MAX_ETHPORT if not found. */ uint16_t -mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev) +mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) { while (port_id < RTE_MAX_ETHPORTS) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; if (dev->state != RTE_ETH_DEV_UNUSED && dev->device && - (dev->device == &pci_dev->device || + (dev->device == odev || (dev->device->driver && dev->device->driver->name && - !strcmp(dev->device->driver->name, MLX5_DRIVER_NAME)))) + ((strcmp(dev->device->driver->name, + MLX5_PCI_DRIVER_NAME) == 0) || + (strcmp(dev->device->driver->name, + MLX5_AUXILIARY_DRIVER_NAME) == 0))))) break; port_id++; } @@ -2104,23 +2789,23 @@ mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev) } /** - * DPDK callback to remove a PCI device. + * Callback to remove a device. * - * This function removes all Ethernet devices belong to a given PCI device. + * This function removes all Ethernet devices belong to a given device. * - * @param[in] pci_dev - * Pointer to the PCI device. + * @param[in] cdev + * Pointer to the generic device. * * @return * 0 on success, the function cannot fail. */ -static int -mlx5_pci_remove(struct rte_pci_device *pci_dev) +int +mlx5_net_remove(struct mlx5_common_device *cdev) { uint16_t port_id; int ret = 0; - RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { + RTE_ETH_FOREACH_DEV_OF(port_id, cdev->dev) { /* * mlx5_dev_close() is not registered to secondary process, * call the close function explicitly for secondary process. @@ -2211,23 +2896,19 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { } }; -static struct mlx5_pci_driver mlx5_driver = { - .driver_class = MLX5_CLASS_NET, - .pci_driver = { - .driver = { - .name = MLX5_DRIVER_NAME, - }, - .id_table = mlx5_pci_id_map, - .probe = mlx5_os_pci_probe, - .remove = mlx5_pci_remove, - .dma_map = mlx5_dma_map, - .dma_unmap = mlx5_dma_unmap, - .drv_flags = PCI_DRV_FLAGS, - }, +static struct mlx5_class_driver mlx5_net_driver = { + .drv_class = MLX5_CLASS_ETH, + .name = RTE_STR(MLX5_ETH_DRIVER_NAME), + .id_table = mlx5_pci_id_map, + .probe = mlx5_os_net_probe, + .remove = mlx5_net_remove, + .probe_again = 1, + .intr_lsc = 1, + .intr_rmv = 1, }; /* Initialize driver log type. */ -RTE_LOG_REGISTER(mlx5_logtype, pmd.net.mlx5, NOTICE) +RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE) /** * Driver initialization routine. @@ -2241,9 +2922,9 @@ RTE_INIT(rte_mlx5_pmd_init) mlx5_set_cksum_table(); mlx5_set_swp_types_table(); if (mlx5_glue) - mlx5_pci_driver_register(&mlx5_driver); + mlx5_class_driver_register(&mlx5_net_driver); } -RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); -RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); -RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); +RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map); +RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");