#include <ethdev_pci.h>
#include <rte_pci.h>
#include <rte_bus_pci.h>
+#include <rte_bus_auxiliary.h>
#include <rte_common.h>
#include <rte_kvargs.h>
#include <rte_rwlock.h>
#include "mlx5_common_os.h"
#include "mlx5_utils.h"
#include "mlx5_rxtx.h"
+#include "mlx5_rx.h"
+#include "mlx5_tx.h"
#include "mlx5_autoconf.h"
#include "mlx5_mr.h"
#include "mlx5_flow.h"
#include "mlx5_nl.h"
#include "mlx5_devx.h"
-#define MLX5_TAGS_HLIST_ARRAY_SIZE 8192
-
#ifndef HAVE_IBV_MLX5_MOD_MPW
#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
/* Process local data for secondary processes. */
static struct mlx5_local_data mlx5_local_data;
+/* rte flow indexed pool configuration. */
+static struct mlx5_indexed_pool_config icfg[] = {
+ {
+ .size = sizeof(struct rte_flow),
+ .trunk_size = 64,
+ .need_lock = 1,
+ .release_mem_en = 0,
+ .malloc = mlx5_malloc,
+ .free = mlx5_free,
+ .per_core_cache = 0,
+ .type = "ctl_flow_ipool",
+ },
+ {
+ .size = sizeof(struct rte_flow),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 1,
+ .release_mem_en = 0,
+ .malloc = mlx5_malloc,
+ .free = mlx5_free,
+ .per_core_cache = 1 << 14,
+ .type = "rte_flow_ipool",
+ },
+ {
+ .size = sizeof(struct rte_flow),
+ .trunk_size = 64,
+ .grow_trunk = 3,
+ .grow_shift = 2,
+ .need_lock = 1,
+ .release_mem_en = 0,
+ .malloc = mlx5_malloc,
+ .free = mlx5_free,
+ .per_core_cache = 0,
+ .type = "mcp_flow_ipool",
+ },
+};
+
/**
* Set the completion channel file descriptor interrupt as non-blocking.
*
#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
device_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps;
#endif
+ strlcpy(device_attr->fw_ver, attr_ex.orig_attr.fw_ver,
+ sizeof(device_attr->fw_ver));
return err;
}
return ret;
}
+/**
+ * Detect misc5 support or not
+ *
+ * @param[in] priv
+ * Device private data pointer
+ */
+#ifdef HAVE_MLX5DV_DR
+static void
+__mlx5_discovery_misc5_cap(struct mlx5_priv *priv)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ /* Dummy VxLAN matcher to detect rdma-core misc5 cap
+ * Case: IPv4--->UDP--->VxLAN--->vni
+ */
+ void *tbl;
+ struct mlx5_flow_dv_match_params matcher_mask;
+ void *match_m;
+ void *matcher;
+ void *headers_m;
+ void *misc5_m;
+ uint32_t *tunnel_header_m;
+ struct mlx5dv_flow_matcher_attr dv_attr;
+
+ memset(&matcher_mask, 0, sizeof(matcher_mask));
+ matcher_mask.size = sizeof(matcher_mask.buf);
+ match_m = matcher_mask.buf;
+ headers_m = MLX5_ADDR_OF(fte_match_param, match_m, outer_headers);
+ misc5_m = MLX5_ADDR_OF(fte_match_param,
+ match_m, misc_parameters_5);
+ tunnel_header_m = (uint32_t *)
+ MLX5_ADDR_OF(fte_match_set_misc5,
+ misc5_m, tunnel_header_1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 4);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xffff);
+ *tunnel_header_m = 0xffffff;
+
+ tbl = mlx5_glue->dr_create_flow_tbl(priv->sh->rx_domain, 1);
+ if (!tbl) {
+ DRV_LOG(INFO, "No SW steering support");
+ return;
+ }
+ dv_attr.type = IBV_FLOW_ATTR_NORMAL,
+ dv_attr.match_mask = (void *)&matcher_mask,
+ dv_attr.match_criteria_enable =
+ (1 << MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT) |
+ (1 << MLX5_MATCH_CRITERIA_ENABLE_MISC5_BIT);
+ dv_attr.priority = 3;
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+ void *misc2_m;
+ if (priv->config.dv_esw_en) {
+ /* FDB enabled reg_c_0 */
+ dv_attr.match_criteria_enable |=
+ (1 << MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT);
+ misc2_m = MLX5_ADDR_OF(fte_match_param,
+ match_m, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2_m,
+ metadata_reg_c_0, 0xffff);
+ }
+#endif
+ matcher = mlx5_glue->dv_create_flow_matcher(priv->sh->ctx,
+ &dv_attr, tbl);
+ if (matcher) {
+ priv->sh->misc5_cap = 1;
+ mlx5_glue->dv_destroy_flow_matcher(matcher);
+ }
+ mlx5_glue->dr_destroy_flow_tbl(tbl);
+#else
+ RTE_SET_USED(priv);
+#endif
+}
+#endif
+
/**
* Verbs callback to free a memory.
*
mlx5_alloc_shared_dr(struct mlx5_priv *priv)
{
struct mlx5_dev_ctx_shared *sh = priv->sh;
- char s[MLX5_HLIST_NAMESIZE] __rte_unused;
+ char s[MLX5_NAME_SIZE] __rte_unused;
int err;
MLX5_ASSERT(sh && sh->refcnt);
goto error;
/* The resources below are only valid with DV support. */
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
- /* Init port id action cache list. */
- snprintf(s, sizeof(s), "%s_port_id_action_cache", sh->ibdev_name);
- mlx5_cache_list_init(&sh->port_id_action_list, s, 0, sh,
- flow_dv_port_id_create_cb,
- flow_dv_port_id_match_cb,
- flow_dv_port_id_remove_cb);
- /* Init push vlan action cache list. */
- snprintf(s, sizeof(s), "%s_push_vlan_action_cache", sh->ibdev_name);
- mlx5_cache_list_init(&sh->push_vlan_action_list, s, 0, sh,
- flow_dv_push_vlan_create_cb,
- flow_dv_push_vlan_match_cb,
- flow_dv_push_vlan_remove_cb);
- /* Init sample action cache list. */
- snprintf(s, sizeof(s), "%s_sample_action_cache", sh->ibdev_name);
- mlx5_cache_list_init(&sh->sample_action_list, s, 0, sh,
- flow_dv_sample_create_cb,
- flow_dv_sample_match_cb,
- flow_dv_sample_remove_cb);
- /* Init dest array action cache list. */
- snprintf(s, sizeof(s), "%s_dest_array_cache", sh->ibdev_name);
- mlx5_cache_list_init(&sh->dest_array_list, s, 0, sh,
- flow_dv_dest_array_create_cb,
- flow_dv_dest_array_match_cb,
- flow_dv_dest_array_remove_cb);
- /* Create tags hash list table. */
- snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name);
- sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0,
- MLX5_HLIST_WRITE_MOST,
- flow_dv_tag_create_cb,
- flow_dv_tag_match_cb,
- flow_dv_tag_remove_cb);
- if (!sh->tag_table) {
- DRV_LOG(ERR, "tags with hash creation failed.");
- err = ENOMEM;
+ /* Init port id action list. */
+ snprintf(s, sizeof(s), "%s_port_id_action_list", sh->ibdev_name);
+ sh->port_id_action_list = mlx5_list_create(s, sh, true,
+ flow_dv_port_id_create_cb,
+ flow_dv_port_id_match_cb,
+ flow_dv_port_id_remove_cb,
+ flow_dv_port_id_clone_cb,
+ flow_dv_port_id_clone_free_cb);
+ if (!sh->port_id_action_list)
goto error;
- }
- sh->tag_table->ctx = sh;
- snprintf(s, sizeof(s), "%s_hdr_modify", sh->ibdev_name);
- sh->modify_cmds = mlx5_hlist_create(s, MLX5_FLOW_HDR_MODIFY_HTABLE_SZ,
- 0, MLX5_HLIST_WRITE_MOST |
- MLX5_HLIST_DIRECT_KEY,
- flow_dv_modify_create_cb,
- flow_dv_modify_match_cb,
- flow_dv_modify_remove_cb);
- if (!sh->modify_cmds) {
- DRV_LOG(ERR, "hdr modify hash creation failed");
- err = ENOMEM;
+ /* Init push vlan action list. */
+ snprintf(s, sizeof(s), "%s_push_vlan_action_list", sh->ibdev_name);
+ sh->push_vlan_action_list = mlx5_list_create(s, sh, true,
+ flow_dv_push_vlan_create_cb,
+ flow_dv_push_vlan_match_cb,
+ flow_dv_push_vlan_remove_cb,
+ flow_dv_push_vlan_clone_cb,
+ flow_dv_push_vlan_clone_free_cb);
+ if (!sh->push_vlan_action_list)
goto error;
- }
- sh->modify_cmds->ctx = sh;
- snprintf(s, sizeof(s), "%s_encaps_decaps", sh->ibdev_name);
- sh->encaps_decaps = mlx5_hlist_create(s,
- MLX5_FLOW_ENCAP_DECAP_HTABLE_SZ,
- 0, MLX5_HLIST_DIRECT_KEY |
- MLX5_HLIST_WRITE_MOST,
- flow_dv_encap_decap_create_cb,
- flow_dv_encap_decap_match_cb,
- flow_dv_encap_decap_remove_cb);
- if (!sh->encaps_decaps) {
- DRV_LOG(ERR, "encap decap hash creation failed");
- err = ENOMEM;
+ /* Init sample action list. */
+ snprintf(s, sizeof(s), "%s_sample_action_list", sh->ibdev_name);
+ sh->sample_action_list = mlx5_list_create(s, sh, true,
+ flow_dv_sample_create_cb,
+ flow_dv_sample_match_cb,
+ flow_dv_sample_remove_cb,
+ flow_dv_sample_clone_cb,
+ flow_dv_sample_clone_free_cb);
+ if (!sh->sample_action_list)
+ goto error;
+ /* Init dest array action list. */
+ snprintf(s, sizeof(s), "%s_dest_array_list", sh->ibdev_name);
+ sh->dest_array_list = mlx5_list_create(s, sh, true,
+ flow_dv_dest_array_create_cb,
+ flow_dv_dest_array_match_cb,
+ flow_dv_dest_array_remove_cb,
+ flow_dv_dest_array_clone_cb,
+ flow_dv_dest_array_clone_free_cb);
+ if (!sh->dest_array_list)
goto error;
- }
- sh->encaps_decaps->ctx = sh;
#endif
#ifdef HAVE_MLX5DV_DR
void *domain;
goto error;
}
sh->fdb_domain = domain;
- sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
+ }
+ /*
+ * The drop action is just some dummy placeholder in rdma-core. It
+ * does not belong to domains and has no any attributes, and, can be
+ * shared by the entire device.
+ */
+ sh->dr_drop_action = mlx5_glue->dr_create_flow_action_drop();
+ if (!sh->dr_drop_action) {
+ DRV_LOG(ERR, "FDB mlx5dv_dr_create_flow_action_drop");
+ err = errno;
+ goto error;
}
#endif
- if (!sh->tunnel_hub)
+ if (!sh->tunnel_hub && priv->config.dv_miss_info)
err = mlx5_alloc_tunnel_hub(sh);
if (err) {
DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err);
mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1);
}
sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
+ if (!priv->config.allow_duplicate_pattern) {
+#ifndef HAVE_MLX5_DR_ALLOW_DUPLICATE
+ DRV_LOG(WARNING, "Disallow duplicate pattern is not supported - maybe old rdma-core version?");
+#endif
+ mlx5_glue->dr_allow_duplicate_rules(sh->rx_domain, 0);
+ mlx5_glue->dr_allow_duplicate_rules(sh->tx_domain, 0);
+ if (sh->fdb_domain)
+ mlx5_glue->dr_allow_duplicate_rules(sh->fdb_domain, 0);
+ }
+
+ __mlx5_discovery_misc5_cap(priv);
#endif /* HAVE_MLX5DV_DR */
sh->default_miss_action =
mlx5_glue->dr_create_flow_action_default_miss();
mlx5_glue->dr_destroy_domain(sh->fdb_domain);
sh->fdb_domain = NULL;
}
- if (sh->esw_drop_action) {
- mlx5_glue->destroy_flow_action(sh->esw_drop_action);
- sh->esw_drop_action = NULL;
+ if (sh->dr_drop_action) {
+ mlx5_glue->destroy_flow_action(sh->dr_drop_action);
+ sh->dr_drop_action = NULL;
}
if (sh->pop_vlan_action) {
mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
sh->tunnel_hub = NULL;
}
mlx5_free_table_hash_list(priv);
+ if (sh->port_id_action_list) {
+ mlx5_list_destroy(sh->port_id_action_list);
+ sh->port_id_action_list = NULL;
+ }
+ if (sh->push_vlan_action_list) {
+ mlx5_list_destroy(sh->push_vlan_action_list);
+ sh->push_vlan_action_list = NULL;
+ }
+ if (sh->sample_action_list) {
+ mlx5_list_destroy(sh->sample_action_list);
+ sh->sample_action_list = NULL;
+ }
+ if (sh->dest_array_list) {
+ mlx5_list_destroy(sh->dest_array_list);
+ sh->dest_array_list = NULL;
+ }
return err;
}
mlx5_glue->dr_destroy_domain(sh->fdb_domain);
sh->fdb_domain = NULL;
}
- if (sh->esw_drop_action) {
- mlx5_glue->destroy_flow_action(sh->esw_drop_action);
- sh->esw_drop_action = NULL;
+ if (sh->dr_drop_action) {
+ mlx5_glue->destroy_flow_action(sh->dr_drop_action);
+ sh->dr_drop_action = NULL;
}
#endif
if (sh->pop_vlan_action) {
mlx5_release_tunnel_hub(sh, priv->dev_port);
sh->tunnel_hub = NULL;
}
- mlx5_cache_list_destroy(&sh->port_id_action_list);
- mlx5_cache_list_destroy(&sh->push_vlan_action_list);
mlx5_free_table_hash_list(priv);
+ if (sh->port_id_action_list) {
+ mlx5_list_destroy(sh->port_id_action_list);
+ sh->port_id_action_list = NULL;
+ }
+ if (sh->push_vlan_action_list) {
+ mlx5_list_destroy(sh->push_vlan_action_list);
+ sh->push_vlan_action_list = NULL;
+ }
+ if (sh->sample_action_list) {
+ mlx5_list_destroy(sh->sample_action_list);
+ sh->sample_action_list = NULL;
+ }
+ if (sh->dest_array_list) {
+ mlx5_list_destroy(sh->dest_array_list);
+ sh->dest_array_list = NULL;
+ }
}
/**
#endif
}
+/**
+ * DR flow drop action support detect.
+ *
+ * @param dev
+ * Pointer to rte_eth_dev structure.
+ *
+ */
+static void
+mlx5_flow_drop_action_config(struct rte_eth_dev *dev __rte_unused)
+{
+#ifdef HAVE_MLX5DV_DR
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (!priv->config.dv_flow_en || !priv->sh->dr_drop_action)
+ return;
+ /**
+ * DR supports drop action placeholder when it is supported;
+ * otherwise, use the queue drop action.
+ */
+ if (mlx5_flow_discover_dr_action_support(dev))
+ priv->root_drop_action = priv->drop_queue.hrxq->action;
+ else
+ priv->root_drop_action = priv->sh->dr_drop_action;
+#endif
+}
+
static void
mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
{
char name[RTE_ETH_NAME_MAX_LEN];
int own_domain_id = 0;
uint16_t port_id;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
- struct mlx5dv_devx_port devx_port = { .comp_mask = 0 };
-#endif
+ struct mlx5_port_info vport_info = { .query_flags = 0 };
+ int i;
/* Determine if this port representor is supposed to be spawned. */
if (switch_info->representor && dpdk_dev->devargs &&
err = mlx5_proc_priv_init(eth_dev);
if (err)
return NULL;
- mp_id.port_id = eth_dev->data->port_id;
- strlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
+ mlx5_mp_id_init(&mp_id, eth_dev->data->port_id);
/* Receive command fd from primary process */
err = mlx5_mp_req_verbs_cmd_fd(&mp_id);
if (err < 0)
priv->vport_meta_tag = 0;
priv->vport_meta_mask = 0;
priv->pf_bond = spawn->pf_bond;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+
+ DRV_LOG(DEBUG,
+ "dev_port=%u bus=%s pci=%s master=%d representor=%d pf_bond=%d\n",
+ priv->dev_port, dpdk_dev->bus->name,
+ priv->pci_dev ? priv->pci_dev->name : "NONE",
+ priv->master, priv->representor, priv->pf_bond);
+
/*
- * The DevX port query API is implemented. E-Switch may use
- * either vport or reg_c[0] metadata register to match on
- * vport index. The engaged part of metadata register is
- * defined by mask.
+ * If we have E-Switch we should determine the vport attributes.
+ * E-Switch may use either source vport field or reg_c[0] metadata
+ * register to match on vport index. The engaged part of metadata
+ * register is defined by mask.
*/
if (switch_info->representor || switch_info->master) {
- devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
- MLX5DV_DEVX_PORT_MATCH_REG_C_0;
- err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port,
- &devx_port);
+ err = mlx5_glue->devx_port_query(sh->ctx,
+ spawn->phys_port,
+ &vport_info);
if (err) {
DRV_LOG(WARNING,
"can't query devx port %d on device %s",
spawn->phys_port,
mlx5_os_get_dev_device_name(spawn->phys_dev));
- devx_port.comp_mask = 0;
+ vport_info.query_flags = 0;
}
}
- if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
- priv->vport_meta_tag = devx_port.reg_c_0.value;
- priv->vport_meta_mask = devx_port.reg_c_0.mask;
+ if (vport_info.query_flags & MLX5_PORT_QUERY_REG_C0) {
+ priv->vport_meta_tag = vport_info.vport_meta_tag;
+ priv->vport_meta_mask = vport_info.vport_meta_mask;
if (!priv->vport_meta_mask) {
DRV_LOG(ERR, "vport zero mask for port %d"
" on bonding device %s",
goto error;
}
}
- if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
- priv->vport_id = devx_port.vport_num;
- } else if (spawn->pf_bond >= 0) {
+ if (vport_info.query_flags & MLX5_PORT_QUERY_VPORT) {
+ priv->vport_id = vport_info.vport_id;
+ } else if (spawn->pf_bond >= 0 &&
+ (switch_info->representor || switch_info->master)) {
DRV_LOG(ERR, "can't deduce vport index for port %d"
" on bonding device %s",
spawn->phys_port,
err = ENOTSUP;
goto error;
} else {
- /* Suppose vport index in compatible way. */
+ /*
+ * Suppose vport index in compatible way. Kernel/rdma_core
+ * support single E-Switch per PF configurations only and
+ * vport_id field contains the vport index for associated VF,
+ * which is deduced from representor port name.
+ * For example, let's have the IB device port 10, it has
+ * attached network device eth0, which has port name attribute
+ * pf0vf2, we can deduce the VF number as 2, and set vport index
+ * as 3 (2+1). This assigning schema should be changed if the
+ * multiple E-Switch instances per PF configurations or/and PCI
+ * subfunctions are added.
+ */
priv->vport_id = switch_info->representor ?
switch_info->port_name + 1 : -1;
}
-#else
- /*
- * Kernel/rdma_core support single E-Switch per PF configurations
- * only and vport_id field contains the vport index for
- * associated VF, which is deduced from representor port name.
- * For example, let's have the IB device port 10, it has
- * attached network device eth0, which has port name attribute
- * pf0vf2, we can deduce the VF number as 2, and set vport index
- * as 3 (2+1). This assigning schema should be changed if the
- * multiple E-Switch instances per PF configurations or/and PCI
- * subfunctions are added.
- */
- priv->vport_id = switch_info->representor ?
- switch_info->port_name + 1 : -1;
-#endif
priv->representor_id = mlx5_representor_id_encode(switch_info,
eth_da->type);
/*
* Look for sibling devices in order to reuse their switch domain
* if any, otherwise allocate one.
*/
- MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+ MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {
const struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
continue;
priv->domain_id = opriv->domain_id;
+ DRV_LOG(DEBUG, "dev_port-%u inherit domain_id=%u\n",
+ priv->dev_port, priv->domain_id);
break;
}
if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
goto error;
}
own_domain_id = 1;
+ DRV_LOG(DEBUG, "dev_port-%u new domain_id=%u\n",
+ priv->dev_port, priv->domain_id);
}
/* Override some values set by hardware configuration. */
mlx5_args(config, dpdk_dev->devargs);
- err = mlx5_dev_check_sibling_config(priv, config);
+ err = mlx5_dev_check_sibling_config(priv, config, dpdk_dev);
if (err)
goto error;
config->hw_csum = !!(sh->device_attr.device_cap_flags_ex &
config->dv_flow_en = 0;
}
#endif
+ if (spawn->max_port > UINT8_MAX) {
+ /* Verbs can't support ports larger than 255 by design. */
+ DRV_LOG(ERR, "can't support IB ports > UINT8_MAX");
+ err = EINVAL;
+ goto error;
+ }
config->ind_table_max_size =
sh->device_attr.max_rwq_indirection_table_size;
/*
}
sh->rq_ts_format = config->hca_attr.rq_ts_format;
sh->sq_ts_format = config->hca_attr.sq_ts_format;
+ sh->steering_format_version =
+ config->hca_attr.steering_format_version;
sh->qp_ts_format = config->hca_attr.qp_ts_format;
/* Check for LRO support. */
if (config->dest_tir && config->hca_attr.lro_cap &&
"required for coalescing is %d bytes",
config->hca_attr.lro_min_mss_size);
}
-#if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER)
+#if defined(HAVE_MLX5DV_DR) && \
+ (defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER) || \
+ defined(HAVE_MLX5_DR_CREATE_ACTION_ASO))
if (config->hca_attr.qos.sup &&
config->hca_attr.qos.flow_meter_old &&
config->dv_flow_en) {
priv->mtr_color_reg);
}
}
+ if (config->hca_attr.qos.sup &&
+ config->hca_attr.qos.flow_meter_aso_sup) {
+ uint32_t log_obj_size =
+ rte_log2_u32(MLX5_ASO_MTRS_PER_POOL >> 1);
+ if (log_obj_size >=
+ config->hca_attr.qos.log_meter_aso_granularity &&
+ log_obj_size <=
+ config->hca_attr.qos.log_meter_aso_max_alloc)
+ sh->meter_aso_en = 1;
+ }
+ if (priv->mtr_en) {
+ err = mlx5_aso_flow_mtrs_mng_init(priv->sh);
+ if (err) {
+ err = -err;
+ goto error;
+ }
+ }
+ if (config->hca_attr.flow.tunnel_header_0_1)
+ sh->tunnel_header_0_1 = 1;
#endif
#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO
if (config->hca_attr.flow_hit_aso &&
DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
}
#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+ defined(HAVE_MLX5_DR_ACTION_ASO_CT)
+ if (config->hca_attr.ct_offload &&
+ priv->mtr_color_reg == REG_C_3) {
+ err = mlx5_flow_aso_ct_mng_init(sh);
+ if (err) {
+ err = -err;
+ goto error;
+ }
+ DRV_LOG(DEBUG, "CT ASO is supported.");
+ sh->ct_aso_en = 1;
+ }
+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */
#if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)
if (config->hca_attr.log_max_ft_sampler_num > 0 &&
config->dv_flow_en) {
if (priv->representor) {
eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
eth_dev->data->representor_id = priv->representor_id;
+ MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {
+ struct mlx5_priv *opriv =
+ rte_eth_devices[port_id].data->dev_private;
+ if (opriv &&
+ opriv->master &&
+ opriv->domain_id == priv->domain_id &&
+ opriv->sh == priv->sh) {
+ eth_dev->data->backer_port_id = port_id;
+ break;
+ }
+ }
+ if (port_id >= RTE_MAX_ETHPORTS)
+ eth_dev->data->backer_port_id = eth_dev->data->port_id;
}
priv->mp_id.port_id = eth_dev->data->port_id;
strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
goto error;
}
DRV_LOG(INFO,
- "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
- eth_dev->data->port_id,
- mac.addr_bytes[0], mac.addr_bytes[1],
- mac.addr_bytes[2], mac.addr_bytes[3],
- mac.addr_bytes[4], mac.addr_bytes[5]);
+ "port %u MAC address is " RTE_ETHER_ADDR_PRT_FMT,
+ eth_dev->data->port_id, RTE_ETHER_ADDR_BYTES(&mac));
#ifdef RTE_LIBRTE_MLX5_DEBUG
{
char ifname[MLX5_NAMESIZE];
mlx5_ifindex(eth_dev),
eth_dev->data->mac_addrs,
MLX5_MAX_MAC_ADDRESSES);
- priv->flows = 0;
priv->ctrl_flows = 0;
rte_spinlock_init(&priv->flow_list_lock);
TAILQ_INIT(&priv->flow_meters);
- TAILQ_INIT(&priv->flow_meter_profiles);
+ priv->mtr_profile_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_PTR);
+ if (!priv->mtr_profile_tbl)
+ goto error;
/* Hint libmlx5 to use PMD allocator for data plane resources */
mlx5_glue->dv_set_context_attr(sh->ctx,
MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
mlx5_set_min_inline(spawn, config);
/* Store device configuration on private structure. */
priv->config = *config;
+ for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
+ icfg[i].release_mem_en = !!config->reclaim_mode;
+ if (config->reclaim_mode)
+ icfg[i].per_core_cache = 0;
+ priv->flows[i] = mlx5_ipool_create(&icfg[i]);
+ if (!priv->flows[i])
+ goto error;
+ }
/* Create context for virtual machine VLAN workaround. */
priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
if (config->dv_flow_en) {
priv->obj_ops.txq_obj_new = mlx5_os_txq_obj_new;
priv->obj_ops.txq_obj_release = mlx5_os_txq_obj_release;
mlx5_queue_counter_id_prepare(eth_dev);
-
+ priv->obj_ops.lb_dummy_queue_create =
+ mlx5_rxq_ibv_obj_dummy_lb_create;
+ priv->obj_ops.lb_dummy_queue_release =
+ mlx5_rxq_ibv_obj_dummy_lb_release;
} else {
priv->obj_ops = ibv_obj_ops;
}
+ if (config->tx_pp &&
+ (priv->config.dv_esw_en ||
+ priv->obj_ops.txq_obj_new != mlx5_os_txq_obj_new)) {
+ /*
+ * HAVE_MLX5DV_DEVX_UAR_OFFSET is required to support
+ * packet pacing and already checked above.
+ * Hence, we should only make sure the SQs will be created
+ * with DevX, not with Verbs.
+ * Verbs allocates the SQ UAR on its own and it can't be shared
+ * with Clock Queue UAR as required for Tx scheduling.
+ */
+ DRV_LOG(ERR, "Verbs SQs, UAR can't be shared as required for packet pacing");
+ err = ENODEV;
+ goto error;
+ }
priv->drop_queue.hrxq = mlx5_drop_action_create(eth_dev);
if (!priv->drop_queue.hrxq)
goto error;
err = ENOTSUP;
goto error;
}
- mlx5_cache_list_init(&priv->hrxqs, "hrxq", 0, eth_dev,
- mlx5_hrxq_create_cb,
- mlx5_hrxq_match_cb,
- mlx5_hrxq_remove_cb);
+ priv->hrxqs = mlx5_list_create("hrxq", eth_dev, true,
+ mlx5_hrxq_create_cb,
+ mlx5_hrxq_match_cb,
+ mlx5_hrxq_remove_cb,
+ mlx5_hrxq_clone_cb,
+ mlx5_hrxq_clone_free_cb);
+ if (!priv->hrxqs)
+ goto error;
+ rte_rwlock_init(&priv->ind_tbls_lock);
/* Query availability of metadata reg_c's. */
err = mlx5_flow_discover_mreg_c(eth_dev);
if (err < 0) {
priv->sh->dv_regc0_mask) {
priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME,
MLX5_FLOW_MREG_HTABLE_SZ,
- 0, 0,
+ false, true, eth_dev,
flow_dv_mreg_create_cb,
flow_dv_mreg_match_cb,
- flow_dv_mreg_remove_cb);
+ flow_dv_mreg_remove_cb,
+ flow_dv_mreg_clone_cb,
+ flow_dv_mreg_clone_free_cb);
if (!priv->mreg_cp_tbl) {
err = ENOMEM;
goto error;
}
- priv->mreg_cp_tbl->ctx = eth_dev;
}
rte_spinlock_init(&priv->shared_act_sl);
mlx5_flow_counter_mode_config(eth_dev);
+ mlx5_flow_drop_action_config(eth_dev);
if (priv->config.dv_flow_en)
eth_dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
return eth_dev;
mlx5_vlan_vmwa_exit(priv->vmwa_context);
if (eth_dev && priv->drop_queue.hrxq)
mlx5_drop_action_destroy(eth_dev);
+ if (priv->mtr_profile_tbl)
+ mlx5_l3t_destroy(priv->mtr_profile_tbl);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
- mlx5_cache_list_destroy(&priv->hrxqs);
+ if (priv->hrxqs)
+ mlx5_list_destroy(priv->hrxqs);
mlx5_free(priv);
if (eth_dev != NULL)
eth_dev->data->dev_private = NULL;
/* Process slave interface names in the loop. */
snprintf(tmp_str, sizeof(tmp_str),
"/sys/class/net/%s", ifname);
- if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) {
+ if (mlx5_get_pci_addr(tmp_str, &pci_addr)) {
DRV_LOG(WARNING, "can not get PCI address"
" for netdev \"%s\"", ifname);
continue;
tmp_str);
break;
}
- /* Match PCI address. */
+ /* Match PCI address, allows BDF0+pfx or BDFx+pfx. */
if (pci_dev->domain == pci_addr.domain &&
pci_dev->bus == pci_addr.bus &&
pci_dev->devid == pci_addr.devid &&
- pci_dev->function + owner == pci_addr.function)
+ ((pci_dev->function == 0 &&
+ pci_dev->function + owner == pci_addr.function) ||
+ (pci_dev->function == owner &&
+ pci_addr.function == owner)))
pf = info.port_name;
/* Get ifindex. */
snprintf(tmp_str, sizeof(tmp_str),
return pf;
}
+static void
+mlx5_os_config_default(struct mlx5_dev_config *config)
+{
+ memset(config, 0, sizeof(*config));
+ config->mps = MLX5_ARG_UNSET;
+ config->dbnc = MLX5_ARG_UNSET;
+ config->rx_vec_en = 1;
+ config->txq_inline_max = MLX5_ARG_UNSET;
+ config->txq_inline_min = MLX5_ARG_UNSET;
+ config->txq_inline_mpw = MLX5_ARG_UNSET;
+ config->txqs_inline = MLX5_ARG_UNSET;
+ config->vf_nl_en = 1;
+ config->mr_ext_memseg_en = 1;
+ config->mr_mempool_reg_en = 1;
+ config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
+ config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
+ config->dv_esw_en = 1;
+ config->dv_flow_en = 1;
+ config->decap_en = 1;
+ config->log_hp_size = MLX5_ARG_UNSET;
+ config->allow_duplicate_pattern = 1;
+}
+
/**
* Register a PCI device within bonding.
*
struct mlx5_bond_info bond_info;
int ret = -1;
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- mlx5_pmd_socket_init();
- ret = mlx5_init_once();
- if (ret) {
- DRV_LOG(ERR, "unable to init PMD global data: %s",
- strerror(rte_errno));
- return -rte_errno;
- }
errno = 0;
ibv_list = mlx5_glue->get_device_list(&ret);
if (!ibv_list) {
break;
} else {
/* Bonding device not found. */
- if (mlx5_dev_to_pci_addr
- (ibv_list[ret]->ibdev_path, &pci_addr))
+ if (mlx5_get_pci_addr(ibv_list[ret]->ibdev_path,
+ &pci_addr))
continue;
if (owner_pci.domain != pci_addr.domain ||
owner_pci.bus != pci_addr.bus ||
goto exit;
}
}
-#ifndef HAVE_MLX5DV_DR_DEVX_PORT
- if (bd >= 0) {
- /*
- * This may happen if there is VF LAG kernel support and
- * application is compiled with older rdma_core library.
- */
- DRV_LOG(ERR,
- "No kernel/verbs support for VF LAG bonding found.");
- rte_errno = ENOTSUP;
- ret = -rte_errno;
- goto exit;
- }
-#endif
/*
* Now we can determine the maximal
* amount of devices to be spawned.
(list[ns].ifindex,
&list[ns].info);
}
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
if (!ret && bd >= 0) {
switch (list[ns].info.name_type) {
case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ if (np == 1) {
+ /*
+ * Force standalone bonding
+ * device for ROCE LAG
+ * confgiurations.
+ */
+ list[ns].info.master = 0;
+ list[ns].info.representor = 0;
+ }
if (list[ns].info.port_name == bd)
ns++;
break;
}
continue;
}
-#endif
if (!ret && (list[ns].info.representor ^
list[ns].info.master))
ns++;
ret = -rte_errno;
goto exit;
}
+ /*
+ * New kernels may add the switch_id attribute for the case
+ * there is no E-Switch and we wrongly recognized the
+ * only device as master. Override this if there is the
+ * single device with single port and new device name
+ * format present.
+ */
+ if (nd == 1 &&
+ list[0].info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
+ list[0].info.master = 0;
+ list[0].info.representor = 0;
+ }
}
MLX5_ASSERT(ns);
/*
uint32_t restore;
/* Default configuration. */
- memset(&dev_config, 0, sizeof(struct mlx5_dev_config));
+ mlx5_os_config_default(&dev_config);
dev_config.vf = dev_config_vf;
- dev_config.mps = MLX5_ARG_UNSET;
- dev_config.dbnc = MLX5_ARG_UNSET;
- dev_config.rx_vec_en = 1;
- dev_config.txq_inline_max = MLX5_ARG_UNSET;
- dev_config.txq_inline_min = MLX5_ARG_UNSET;
- dev_config.txq_inline_mpw = MLX5_ARG_UNSET;
- dev_config.txqs_inline = MLX5_ARG_UNSET;
- dev_config.vf_nl_en = 1;
- dev_config.mr_ext_memseg_en = 1;
- dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
- dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
- dev_config.dv_esw_en = 1;
- dev_config.dv_flow_en = 1;
- dev_config.decap_en = 1;
- dev_config.log_hp_size = MLX5_ARG_UNSET;
+ list[i].numa_node = pci_dev->device.numa_node;
list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
&list[i],
&dev_config,
}
restore = list[i].eth_dev->data->dev_flags;
rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
+ /**
+ * Each representor has a dedicated interrupts vector.
+ * rte_eth_copy_pci_info() assigns PF interrupts handle to
+ * representor eth_dev object because representor and PF
+ * share the same PCI address.
+ * Override representor device with a dedicated
+ * interrupts handle here.
+ * Representor interrupts handle is released in mlx5_dev_stop().
+ */
+ if (list[i].info.representor) {
+ struct rte_intr_handle *intr_handle;
+ intr_handle = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
+ sizeof(*intr_handle), 0,
+ SOCKET_ID_ANY);
+ if (!intr_handle) {
+ DRV_LOG(ERR,
+ "port %u failed to allocate memory for interrupt handler "
+ "Rx interrupts will not be supported",
+ i);
+ rte_errno = ENOMEM;
+ ret = -rte_errno;
+ goto exit;
+ }
+ list[i].eth_dev->intr_handle = intr_handle;
+ }
/* Restore non-PCI flags cleared by the above call. */
list[i].eth_dev->data->dev_flags |= restore;
rte_eth_dev_probing_finish(list[i].eth_dev);
return ret;
}
+static int
+mlx5_os_parse_eth_devargs(struct rte_device *dev,
+ struct rte_eth_devargs *eth_da)
+{
+ int ret = 0;
+
+ if (dev->devargs == NULL)
+ return 0;
+ memset(eth_da, 0, sizeof(*eth_da));
+ /* Parse representor information first from class argument. */
+ if (dev->devargs->cls_str)
+ ret = rte_eth_devargs_parse(dev->devargs->cls_str, eth_da);
+ if (ret != 0) {
+ DRV_LOG(ERR, "failed to parse device arguments: %s",
+ dev->devargs->cls_str);
+ return -rte_errno;
+ }
+ if (eth_da->type == RTE_ETH_REPRESENTOR_NONE) {
+ /* Parse legacy device argument */
+ ret = rte_eth_devargs_parse(dev->devargs->args, eth_da);
+ if (ret) {
+ DRV_LOG(ERR, "failed to parse device arguments: %s",
+ dev->devargs->args);
+ return -rte_errno;
+ }
+ }
+ return 0;
+}
+
/**
- * DPDK callback to register a PCI device.
+ * Callback to register a PCI device.
*
* This function spawns Ethernet devices out of a given PCI device.
*
- * @param[in] pci_drv
- * PCI driver structure (mlx5_driver).
* @param[in] pci_dev
* PCI device information.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
-mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
- struct rte_pci_device *pci_dev)
+static int
+mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
{
- struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
+ struct rte_eth_devargs eth_da = { .nb_ports = 0 };
int ret = 0;
uint16_t p;
- if (pci_dev->device.devargs) {
- /* Parse representor information from device argument. */
- if (pci_dev->device.devargs->cls_str)
- ret = rte_eth_devargs_parse
- (pci_dev->device.devargs->cls_str, ð_da);
- if (ret) {
- DRV_LOG(ERR, "failed to parse device arguments: %s",
- pci_dev->device.devargs->cls_str);
- return -rte_errno;
- }
- if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
- /* Support legacy device argument */
- ret = rte_eth_devargs_parse
- (pci_dev->device.devargs->args, ð_da);
- if (ret) {
- DRV_LOG(ERR, "failed to parse device arguments: %s",
- pci_dev->device.devargs->args);
- return -rte_errno;
- }
- }
- }
+ ret = mlx5_os_parse_eth_devargs(&pci_dev->device, ð_da);
+ if (ret != 0)
+ return ret;
if (eth_da.nb_ports > 0) {
/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
- for (p = 0; p < eth_da.nb_ports; p++)
+ for (p = 0; p < eth_da.nb_ports; p++) {
ret = mlx5_os_pci_probe_pf(pci_dev, ð_da,
eth_da.ports[p]);
+ if (ret)
+ break;
+ }
+ if (ret) {
+ DRV_LOG(ERR, "Probe of PCI device " PCI_PRI_FMT " "
+ "aborted due to proding failure of PF %u",
+ pci_dev->addr.domain, pci_dev->addr.bus,
+ pci_dev->addr.devid, pci_dev->addr.function,
+ eth_da.ports[p]);
+ mlx5_net_remove(&pci_dev->device);
+ }
} else {
ret = mlx5_os_pci_probe_pf(pci_dev, ð_da, 0);
}
return ret;
}
+/* Probe a single SF device on auxiliary bus, no representor support. */
+static int
+mlx5_os_auxiliary_probe(struct rte_device *dev)
+{
+ struct rte_eth_devargs eth_da = { .nb_ports = 0 };
+ struct mlx5_dev_config config;
+ struct mlx5_dev_spawn_data spawn = { .pf_bond = -1 };
+ struct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev);
+ struct rte_eth_dev *eth_dev;
+ int ret = 0;
+
+ /* Parse ethdev devargs. */
+ ret = mlx5_os_parse_eth_devargs(dev, ð_da);
+ if (ret != 0)
+ return ret;
+ /* Set default config data. */
+ mlx5_os_config_default(&config);
+ config.sf = 1;
+ /* Init spawn data. */
+ spawn.max_port = 1;
+ spawn.phys_port = 1;
+ spawn.phys_dev = mlx5_os_get_ibv_dev(dev);
+ if (spawn.phys_dev == NULL)
+ return -rte_errno;
+ ret = mlx5_auxiliary_get_ifindex(dev->name);
+ if (ret < 0) {
+ DRV_LOG(ERR, "failed to get ethdev ifindex: %s", dev->name);
+ return ret;
+ }
+ spawn.ifindex = ret;
+ spawn.numa_node = dev->numa_node;
+ /* Spawn device. */
+ eth_dev = mlx5_dev_spawn(dev, &spawn, &config, ð_da);
+ if (eth_dev == NULL)
+ return -rte_errno;
+ /* Post create. */
+ eth_dev->intr_handle = &adev->intr_handle;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
+ eth_dev->data->numa_node = dev->numa_node;
+ }
+ rte_eth_dev_probing_finish(eth_dev);
+ return 0;
+}
+
+/**
+ * Net class driver callback to probe a device.
+ *
+ * This function probe PCI bus device(s) or a single SF on auxiliary bus.
+ *
+ * @param[in] dev
+ * Pointer to the generic device.
+ *
+ * @return
+ * 0 on success, the function cannot fail.
+ */
+int
+mlx5_os_net_probe(struct rte_device *dev)
+{
+ int ret;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ mlx5_pmd_socket_init();
+ ret = mlx5_init_once();
+ if (ret) {
+ DRV_LOG(ERR, "unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
+ if (mlx5_dev_is_pci(dev))
+ return mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev));
+ else
+ return mlx5_os_auxiliary_probe(dev);
+}
+
static int
mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config)
{
int dbmap_env;
int err = 0;
- sh->numa_node = spawn->pci_dev->device.numa_node;
pthread_mutex_init(&sh->txpp.mutex, NULL);
/*
* Configure environment variable "MLX5_BF_SHUT_UP"
if (priv->sh) {
if (priv->q_counters != NULL &&
strcmp(ctr_name, "out_of_buffer") == 0)
- return mlx5_devx_cmd_queue_counter_query(priv->sh->ctx,
- 0, (uint32_t *)stat);
+ return mlx5_devx_cmd_queue_counter_query
+ (priv->q_counters, 0, (uint32_t *)stat);
MKSTR(path, "%s/ports/%d/hw_counters/%s",
priv->sh->ibdev_path,
priv->dev_port,