#include "mlx5_rx.h"
#include "mlx5_tx.h"
#include "mlx5_autoconf.h"
-#include "mlx5_mr.h"
#include "mlx5_flow.h"
#include "rte_pmd_mlx5.h"
#include "mlx5_verbs.h"
* Pointer to RQ channel object, which includes the channel fd
*
* @param[out] fd
- * The file descriptor (representing the intetrrupt) used in this channel.
+ * The file descriptor (representing the interrupt) used in this channel.
*
* @return
* 0 on successfully setting the fd to non-blocking, non-zero otherwise.
* with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5
* device attributes from the glue out parameter.
*
- * @param dev
- * Pointer to ibv context.
+ * @param cdev
+ * Pointer to mlx5 device.
*
* @param device_attr
* Pointer to mlx5 device attributes.
* 0 on success, non zero error number otherwise
*/
int
-mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr)
+mlx5_os_get_dev_attr(struct mlx5_common_device *cdev,
+ struct mlx5_dev_attr *device_attr)
{
int err;
+ struct ibv_context *ctx = cdev->ctx;
struct ibv_device_attr_ex attr_ex;
+
memset(device_attr, 0, sizeof(*device_attr));
err = mlx5_glue->query_device_ex(ctx, NULL, &attr_ex);
if (err)
return err;
-
device_attr->device_cap_flags_ex = attr_ex.device_cap_flags_ex;
device_attr->max_qp_wr = attr_ex.orig_attr.max_qp_wr;
device_attr->max_sge = attr_ex.orig_attr.max_sge;
metadata_reg_c_0, 0xffff);
}
#endif
- matcher = mlx5_glue->dv_create_flow_matcher(priv->sh->ctx,
+ matcher = mlx5_glue->dv_create_flow_matcher(priv->sh->cdev->ctx,
&dv_attr, tbl);
if (matcher) {
priv->sh->misc5_cap = 1;
flow_dv_dest_array_clone_free_cb);
if (!sh->dest_array_list)
goto error;
+ /* Init shared flex parsers list, no need lcore_share */
+ snprintf(s, sizeof(s), "%s_flex_parsers_list", sh->ibdev_name);
+ sh->flex_parsers_dv = mlx5_list_create(s, sh, false,
+ mlx5_flex_parser_create_cb,
+ mlx5_flex_parser_match_cb,
+ mlx5_flex_parser_remove_cb,
+ mlx5_flex_parser_clone_cb,
+ mlx5_flex_parser_clone_free_cb);
+ if (!sh->flex_parsers_dv)
+ goto error;
#endif
#ifdef HAVE_MLX5DV_DR
void *domain;
/* Reference counter is zero, we should initialize structures. */
- domain = mlx5_glue->dr_create_domain(sh->ctx,
+ domain = mlx5_glue->dr_create_domain(sh->cdev->ctx,
MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
if (!domain) {
DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed");
goto error;
}
sh->rx_domain = domain;
- domain = mlx5_glue->dr_create_domain(sh->ctx,
+ domain = mlx5_glue->dr_create_domain(sh->cdev->ctx,
MLX5DV_DR_DOMAIN_TYPE_NIC_TX);
if (!domain) {
DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed");
sh->tx_domain = domain;
#ifdef HAVE_MLX5DV_DR_ESWITCH
if (priv->config.dv_esw_en) {
- domain = mlx5_glue->dr_create_domain
- (sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);
+ domain = mlx5_glue->dr_create_domain(sh->cdev->ctx,
+ MLX5DV_DR_DOMAIN_TYPE_FDB);
if (!domain) {
DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed");
err = errno;
mlx5_glue->dr_create_flow_action_default_miss();
if (!sh->default_miss_action)
DRV_LOG(WARNING, "Default miss action is not supported.");
+ LIST_INIT(&sh->shared_rxqs);
return 0;
error:
/* Rollback the created objects. */
MLX5_ASSERT(sh && sh->refcnt);
if (sh->refcnt > 1)
return;
+ MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs));
#ifdef HAVE_MLX5DV_DR
if (sh->rx_domain) {
mlx5_glue->dr_destroy_domain(sh->rx_domain);
case RTE_PROC_PRIMARY:
if (sd->init_done)
break;
- LIST_INIT(&sd->mem_event_cb_list);
- rte_rwlock_init(&sd->mem_event_rwlock);
- rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
- mlx5_mr_mem_event_cb, NULL);
ret = mlx5_mp_init_primary(MLX5_MP_NAME,
mlx5_mp_os_primary_handle);
if (ret)
return ret;
}
-/**
- * Create the Tx queue DevX/Verbs object.
- *
- * @param dev
- * Pointer to Ethernet device.
- * @param idx
- * Queue index in DPDK Tx queue array.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_os_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
- struct mlx5_txq_ctrl *txq_ctrl =
- container_of(txq_data, struct mlx5_txq_ctrl, txq);
-
- if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN)
- return mlx5_txq_devx_obj_new(dev, idx);
-#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET
- if (!priv->config.dv_esw_en)
- return mlx5_txq_devx_obj_new(dev, idx);
-#endif
- return mlx5_txq_ibv_obj_new(dev, idx);
-}
-
-/**
- * Release an Tx DevX/verbs queue object.
- *
- * @param txq_obj
- * DevX/Verbs Tx queue object.
- */
-static void
-mlx5_os_txq_obj_release(struct mlx5_txq_obj *txq_obj)
-{
- if (txq_obj->txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
- mlx5_txq_devx_obj_release(txq_obj);
- return;
- }
-#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET
- if (!txq_obj->txq_ctrl->priv->config.dv_esw_en) {
- mlx5_txq_devx_obj_release(txq_obj);
- return;
- }
-#endif
- mlx5_txq_ibv_obj_release(txq_obj);
-}
-
/**
* DV flow counter mode detect and config.
*
fallback = true;
#else
fallback = false;
- if (!priv->config.devx || !priv->config.dv_flow_en ||
+ if (!sh->devx || !priv->config.dv_flow_en ||
!priv->config.hca_attr.flow_counters_dump ||
!(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) ||
(mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP))
* DR supports drop action placeholder when it is supported;
* otherwise, use the queue drop action.
*/
- if (mlx5_flow_discover_dr_action_support(dev))
- priv->root_drop_action = priv->drop_queue.hrxq->action;
- else
+ if (!priv->sh->drop_action_check_flag) {
+ if (!mlx5_flow_discover_dr_action_support(dev))
+ priv->sh->dr_drop_action_en = 1;
+ priv->sh->drop_action_check_flag = 1;
+ }
+ if (priv->sh->dr_drop_action_en)
priv->root_drop_action = priv->sh->dr_drop_action;
+ else
+ priv->root_drop_action = priv->drop_queue.hrxq->action;
#endif
}
mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
- void *ctx = priv->sh->ctx;
+ void *ctx = priv->sh->cdev->ctx;
priv->q_counters = mlx5_devx_cmd_queue_counter_alloc(ctx);
if (!priv->q_counters) {
.wq_type = IBV_WQT_RQ,
.max_wr = 1,
.max_sge = 1,
- .pd = priv->sh->pd,
+ .pd = priv->sh->cdev->pd,
.cq = cq,
});
if (wq) {
return false;
}
-
/**
* Spawn an Ethernet device from Verbs information.
*
{
const struct mlx5_switch_info *switch_info = &spawn->info;
struct mlx5_dev_ctx_shared *sh = NULL;
- struct ibv_port_attr port_attr;
+ struct ibv_port_attr port_attr = { .state = IBV_PORT_NOP };
struct mlx5dv_context dv_attr = { .comp_mask = 0 };
struct rte_eth_dev *eth_dev = NULL;
struct mlx5_priv *priv = NULL;
int own_domain_id = 0;
uint16_t port_id;
struct mlx5_port_info vport_info = { .query_flags = 0 };
+ int nl_rdma = -1;
int i;
/* Determine if this port representor is supposed to be spawned. */
sh = mlx5_alloc_shared_dev_ctx(spawn, config);
if (!sh)
return NULL;
- config->devx = sh->devx;
#ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR
config->dest_tir = 1;
#endif
#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
#endif
- mlx5_glue->dv_query_device(sh->ctx, &dv_attr);
+ mlx5_glue->dv_query_device(sh->cdev->ctx, &dv_attr);
if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
DRV_LOG(DEBUG, "enhanced MPW is supported");
" old OFED/rdma-core version or firmware configuration");
#endif
config->mpls_en = mpls_en;
+ nl_rdma = mlx5_nl_init(NETLINK_RDMA);
/* Check port status. */
- err = mlx5_glue->query_port(sh->ctx, spawn->phys_port, &port_attr);
- if (err) {
- DRV_LOG(ERR, "port query failed: %s", strerror(err));
- goto error;
- }
- if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
- DRV_LOG(ERR, "port is not configured in Ethernet mode");
- err = EINVAL;
- goto error;
+ if (spawn->phys_port <= UINT8_MAX) {
+ /* Legacy Verbs api only support u8 port number. */
+ err = mlx5_glue->query_port(sh->cdev->ctx, spawn->phys_port,
+ &port_attr);
+ if (err) {
+ DRV_LOG(ERR, "port query failed: %s", strerror(err));
+ goto error;
+ }
+ if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+ DRV_LOG(ERR, "port is not configured in Ethernet mode");
+ err = EINVAL;
+ goto error;
+ }
+ } else if (nl_rdma >= 0) {
+ /* IB doesn't allow more than 255 ports, must be Ethernet. */
+ err = mlx5_nl_port_state(nl_rdma,
+ spawn->phys_dev_name,
+ spawn->phys_port);
+ if (err < 0) {
+ DRV_LOG(INFO, "Failed to get netlink port state: %s",
+ strerror(rte_errno));
+ err = -rte_errno;
+ goto error;
+ }
+ port_attr.state = (enum ibv_port_state)err;
}
if (port_attr.state != IBV_PORT_ACTIVE)
- DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)",
+ DRV_LOG(INFO, "port is not active: \"%s\" (%d)",
mlx5_glue->port_state_str(port_attr.state),
port_attr.state);
/* Allocate private eth device data. */
priv->pci_dev = spawn->pci_dev;
priv->mtu = RTE_ETHER_MTU;
/* Some internal functions rely on Netlink sockets, open them now. */
- priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA);
+ priv->nl_socket_rdma = nl_rdma;
priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE);
priv->representor = !!switch_info->representor;
priv->master = !!switch_info->master;
* register is defined by mask.
*/
if (switch_info->representor || switch_info->master) {
- err = mlx5_glue->devx_port_query(sh->ctx,
+ err = mlx5_glue->devx_port_query(sh->cdev->ctx,
spawn->phys_port,
&vport_info);
if (err) {
config->dv_flow_en = 0;
}
#endif
- if (spawn->max_port > UINT8_MAX) {
- /* Verbs can't support ports larger than 255 by design. */
- DRV_LOG(ERR, "can't support IB ports > UINT8_MAX");
- err = EINVAL;
- goto error;
- }
config->ind_table_max_size =
sh->device_attr.max_rwq_indirection_table_size;
/*
* Remove this check once DPDK supports larger/variable
* indirection tables.
*/
- if (config->ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512)
- config->ind_table_max_size = ETH_RSS_RETA_SIZE_512;
+ if (config->ind_table_max_size > (unsigned int)RTE_ETH_RSS_RETA_SIZE_512)
+ config->ind_table_max_size = RTE_ETH_RSS_RETA_SIZE_512;
DRV_LOG(DEBUG, "maximum Rx indirection table size is %u",
config->ind_table_max_size);
config->hw_vlan_strip = !!(sh->device_attr.raw_packet_caps &
config->mps == MLX5_MPW_ENHANCED ? "enhanced " :
config->mps == MLX5_MPW ? "legacy " : "",
config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
- if (config->devx) {
- err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr);
- if (err) {
- err = -err;
- goto error;
- }
- /* Check relax ordering support. */
- if (!haswell_broadwell_cpu) {
- sh->cmng.relaxed_ordering_write =
- config->hca_attr.relaxed_ordering_write;
- sh->cmng.relaxed_ordering_read =
- config->hca_attr.relaxed_ordering_read;
- } else {
- sh->cmng.relaxed_ordering_read = 0;
- sh->cmng.relaxed_ordering_write = 0;
- }
- sh->rq_ts_format = config->hca_attr.rq_ts_format;
- sh->sq_ts_format = config->hca_attr.sq_ts_format;
+ if (sh->devx) {
+ config->hca_attr = sh->cdev->config.hca_attr;
sh->steering_format_version =
config->hca_attr.steering_format_version;
- sh->qp_ts_format = config->hca_attr.qp_ts_format;
/* Check for LRO support. */
if (config->dest_tir && config->hca_attr.lro_cap &&
config->dv_flow_en) {
config->cqe_comp = 0;
}
if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX &&
- (!config->devx || !config->hca_attr.mini_cqe_resp_flow_tag)) {
+ (!sh->devx || !config->hca_attr.mini_cqe_resp_flow_tag)) {
DRV_LOG(WARNING, "Flow Tag CQE compression"
" format isn't supported.");
config->cqe_comp = 0;
}
if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX &&
- (!config->devx || !config->hca_attr.mini_cqe_resp_l3_l4_tag)) {
+ (!sh->devx || !config->hca_attr.mini_cqe_resp_l3_l4_tag)) {
DRV_LOG(WARNING, "L3/L4 Header CQE compression"
" format isn't supported.");
config->cqe_comp = 0;
config->hca_attr.log_max_static_sq_wq);
DRV_LOG(DEBUG, "WQE rate PP mode is %ssupported",
config->hca_attr.qos.wqe_rate_pp ? "" : "not ");
- if (!config->devx) {
+ if (!sh->devx) {
DRV_LOG(ERR, "DevX is required for packet pacing");
err = ENODEV;
goto error;
goto error;
#endif
}
- if (config->devx) {
+ if (config->std_delay_drop || config->hp_delay_drop) {
+ if (!config->hca_attr.rq_delay_drop) {
+ config->std_delay_drop = 0;
+ config->hp_delay_drop = 0;
+ DRV_LOG(WARNING,
+ "dev_port-%u: Rxq delay drop is not supported",
+ priv->dev_port);
+ }
+ }
+ if (sh->devx) {
uint32_t reg[MLX5_ST_SZ_DW(register_mtutc)];
err = config->hca_attr.access_register_user ?
mlx5_devx_cmd_register_read
- (sh->ctx, MLX5_REGISTER_ID_MTUTC, 0,
+ (sh->cdev->ctx, MLX5_REGISTER_ID_MTUTC, 0,
reg, MLX5_ST_SZ_DW(register_mtutc)) : ENOTSUP;
if (!err) {
uint32_t ts_mode;
/*
* If HW has bug working with tunnel packet decapsulation and
* scatter FCS, and decapsulation is needed, clear the hw_fcs_strip
- * bit. Then DEV_RX_OFFLOAD_KEEP_CRC bit will not be set anymore.
+ * bit. Then RTE_ETH_RX_OFFLOAD_KEEP_CRC bit will not be set anymore.
*/
if (config->hca_attr.scatter_fcs_w_decap_disable && config->decap_en)
config->hw_fcs_strip = 0;
DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
(config->hw_fcs_strip ? "" : "not "));
if (config->mprq.enabled && mprq) {
- if (config->mprq.stride_num_n &&
- (config->mprq.stride_num_n > mprq_max_stride_num_n ||
- config->mprq.stride_num_n < mprq_min_stride_num_n)) {
- config->mprq.stride_num_n =
- RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N,
- mprq_min_stride_num_n),
- mprq_max_stride_num_n);
+ if (config->mprq.log_stride_num &&
+ (config->mprq.log_stride_num > mprq_max_stride_num_n ||
+ config->mprq.log_stride_num < mprq_min_stride_num_n)) {
+ config->mprq.log_stride_num =
+ RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM,
+ mprq_min_stride_num_n),
+ mprq_max_stride_num_n);
DRV_LOG(WARNING,
"the number of strides"
" for Multi-Packet RQ is out of range,"
" setting default value (%u)",
- 1 << config->mprq.stride_num_n);
- }
- if (config->mprq.stride_size_n &&
- (config->mprq.stride_size_n > mprq_max_stride_size_n ||
- config->mprq.stride_size_n < mprq_min_stride_size_n)) {
- config->mprq.stride_size_n =
- RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N,
- mprq_min_stride_size_n),
- mprq_max_stride_size_n);
+ 1 << config->mprq.log_stride_num);
+ }
+ if (config->mprq.log_stride_size &&
+ (config->mprq.log_stride_size > mprq_max_stride_size_n ||
+ config->mprq.log_stride_size < mprq_min_stride_size_n)) {
+ config->mprq.log_stride_size =
+ RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE,
+ mprq_min_stride_size_n),
+ mprq_max_stride_size_n);
DRV_LOG(WARNING,
"the size of a stride"
" for Multi-Packet RQ is out of range,"
" setting default value (%u)",
- 1 << config->mprq.stride_size_n);
+ 1 << config->mprq.log_stride_size);
}
- config->mprq.min_stride_size_n = mprq_min_stride_size_n;
- config->mprq.max_stride_size_n = mprq_max_stride_size_n;
+ config->mprq.log_min_stride_size = mprq_min_stride_size_n;
+ config->mprq.log_max_stride_size = mprq_max_stride_size_n;
} else if (config->mprq.enabled && !mprq) {
DRV_LOG(WARNING, "Multi-Packet RQ isn't supported");
config->mprq.enabled = 0;
*/
MLX5_ASSERT(spawn->ifindex);
priv->if_index = spawn->ifindex;
+ priv->lag_affinity_idx = sh->refcnt - 1;
eth_dev->data->dev_private = priv;
priv->dev_data = eth_dev->data;
eth_dev->data->mac_addrs = priv->mac;
err = mlx5_alloc_shared_dr(priv);
if (err)
goto error;
+ if (mlx5_flex_item_port_init(eth_dev) < 0)
+ goto error;
}
- if (config->devx && config->dv_flow_en && config->dest_tir) {
+ if (sh->devx && config->dv_flow_en && config->dest_tir) {
priv->obj_ops = devx_obj_ops;
- priv->obj_ops.drop_action_create =
- ibv_obj_ops.drop_action_create;
- priv->obj_ops.drop_action_destroy =
- ibv_obj_ops.drop_action_destroy;
-#ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET
- priv->obj_ops.txq_obj_modify = ibv_obj_ops.txq_obj_modify;
-#else
- if (config->dv_esw_en)
- priv->obj_ops.txq_obj_modify =
- ibv_obj_ops.txq_obj_modify;
-#endif
- /* Use specific wrappers for Tx object. */
- priv->obj_ops.txq_obj_new = mlx5_os_txq_obj_new;
- priv->obj_ops.txq_obj_release = mlx5_os_txq_obj_release;
mlx5_queue_counter_id_prepare(eth_dev);
priv->obj_ops.lb_dummy_queue_create =
mlx5_rxq_ibv_obj_dummy_lb_create;
priv->obj_ops.lb_dummy_queue_release =
mlx5_rxq_ibv_obj_dummy_lb_release;
+ } else if (spawn->max_port > UINT8_MAX) {
+ /* Verbs can't support ports larger than 255 by design. */
+ DRV_LOG(ERR, "must enable DV and ESW when RDMA link ports > 255");
+ err = ENOTSUP;
+ goto error;
} else {
priv->obj_ops = ibv_obj_ops;
}
if (config->tx_pp &&
- (priv->config.dv_esw_en ||
- priv->obj_ops.txq_obj_new != mlx5_os_txq_obj_new)) {
+ priv->obj_ops.txq_obj_new != mlx5_txq_devx_obj_new) {
/*
* HAVE_MLX5DV_DEVX_UAR_OFFSET is required to support
* packet pacing and already checked above.
priv->drop_queue.hrxq = mlx5_drop_action_create(eth_dev);
if (!priv->drop_queue.hrxq)
goto error;
- /* Supported Verbs flow priority number detection. */
- err = mlx5_flow_discover_priorities(eth_dev);
+ /* Port representor shares the same max priority with pf port. */
+ if (!priv->sh->flow_priority_check_flag) {
+ /* Supported Verbs flow priority number detection. */
+ err = mlx5_flow_discover_priorities(eth_dev);
+ priv->sh->flow_max_priority = err;
+ priv->sh->flow_priority_check_flag = 1;
+ } else {
+ err = priv->sh->flow_max_priority;
+ }
if (err < 0) {
err = -err;
goto error;
}
- priv->config.flow_prio = err;
if (!priv->config.dv_esw_en &&
priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
DRV_LOG(WARNING, "metadata mode %u is not supported "
goto error;
rte_rwlock_init(&priv->ind_tbls_lock);
/* Query availability of metadata reg_c's. */
- err = mlx5_flow_discover_mreg_c(eth_dev);
- if (err < 0) {
- err = -err;
- goto error;
+ if (!priv->sh->metadata_regc_check_flag) {
+ err = mlx5_flow_discover_mreg_c(eth_dev);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
}
if (!mlx5_flow_ext_mreg_supported(eth_dev)) {
DRV_LOG(DEBUG,
mlx5_os_free_shared_dr(priv);
if (priv->nl_socket_route >= 0)
close(priv->nl_socket_route);
- if (priv->nl_socket_rdma >= 0)
- close(priv->nl_socket_rdma);
if (priv->vmwa_context)
mlx5_vlan_vmwa_exit(priv->vmwa_context);
if (eth_dev && priv->drop_queue.hrxq)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
if (priv->hrxqs)
mlx5_list_destroy(priv->hrxqs);
+ if (eth_dev && priv->flex_item_map)
+ mlx5_flex_item_port_cleanup(eth_dev);
mlx5_free(priv);
if (eth_dev != NULL)
eth_dev->data->dev_private = NULL;
}
if (sh)
mlx5_free_shared_dev_ctx(sh);
+ if (nl_rdma >= 0)
+ close(nl_rdma);
MLX5_ASSERT(err > 0);
rte_errno = err;
return NULL;
/**
* Match PCI information for possible slaves of bonding device.
*
- * @param[in] ibv_dev
- * Pointer to Infiniband device structure.
+ * @param[in] ibdev_name
+ * Name of Infiniband device.
* @param[in] pci_dev
* Pointer to primary PCI address structure to match.
* @param[in] nl_rdma
* Netlink RDMA group socket handle.
* @param[in] owner
- * Rerepsentor owner PF index.
+ * Representor owner PF index.
* @param[out] bond_info
* Pointer to bonding information.
*
* positive index of slave PF in bonding.
*/
static int
-mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
+mlx5_device_bond_pci_match(const char *ibdev_name,
const struct rte_pci_addr *pci_dev,
int nl_rdma, uint16_t owner,
struct mlx5_bond_info *bond_info)
FILE *bond_file = NULL, *file;
int pf = -1;
int ret;
+ uint8_t cur_guid[32] = {0};
+ uint8_t guid[32] = {0};
/*
- * Try to get master device name. If something goes
- * wrong suppose the lack of kernel support and no
- * bonding devices.
+ * Try to get master device name. If something goes wrong suppose
+ * the lack of kernel support and no bonding devices.
*/
memset(bond_info, 0, sizeof(*bond_info));
if (nl_rdma < 0)
return -1;
- if (!strstr(ibv_dev->name, "bond"))
+ if (!strstr(ibdev_name, "bond"))
return -1;
- np = mlx5_nl_portnum(nl_rdma, ibv_dev->name);
+ np = mlx5_nl_portnum(nl_rdma, ibdev_name);
if (!np)
return -1;
+ if (mlx5_get_device_guid(pci_dev, cur_guid, sizeof(cur_guid)) < 0)
+ return -1;
/*
- * The Master device might not be on the predefined
- * port (not on port index 1, it is not garanted),
- * we have to scan all Infiniband device port and
- * find master.
+ * The master device might not be on the predefined port(not on port
+ * index 1, it is not guaranteed), we have to scan all Infiniband
+ * device ports and find master.
*/
for (i = 1; i <= np; ++i) {
/* Check whether Infiniband port is populated. */
- ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i);
+ ifindex = mlx5_nl_ifindex(nl_rdma, ibdev_name, i);
if (!ifindex)
continue;
if (!if_indextoname(ifindex, ifname))
char tmp_str[IF_NAMESIZE + 32];
struct rte_pci_addr pci_addr;
struct mlx5_switch_info info;
+ int ret;
/* Process slave interface names in the loop. */
snprintf(tmp_str, sizeof(tmp_str),
"/sys/class/net/%s", ifname);
if (mlx5_get_pci_addr(tmp_str, &pci_addr)) {
- DRV_LOG(WARNING, "can not get PCI address"
- " for netdev \"%s\"", ifname);
+ DRV_LOG(WARNING,
+ "Cannot get PCI address for netdev \"%s\".",
+ ifname);
continue;
}
/* Slave interface PCI address match found. */
tmp_str);
break;
}
- /* Match PCI address, allows BDF0+pfx or BDFx+pfx. */
- if (pci_dev->domain == pci_addr.domain &&
- pci_dev->bus == pci_addr.bus &&
- pci_dev->devid == pci_addr.devid &&
- ((pci_dev->function == 0 &&
- pci_dev->function + owner == pci_addr.function) ||
- (pci_dev->function == owner &&
- pci_addr.function == owner)))
- pf = info.port_name;
/* Get ifindex. */
snprintf(tmp_str, sizeof(tmp_str),
"/sys/class/net/%s/ifindex", ifname);
bond_info->ports[info.port_name].pci_addr = pci_addr;
bond_info->ports[info.port_name].ifindex = ifindex;
bond_info->n_port++;
+ /*
+ * Under socket direct mode, bonding will use
+ * system_image_guid as identification.
+ * After OFED 5.4, guid is readable (ret >= 0) under sysfs.
+ * All bonding members should have the same guid even if driver
+ * is using PCIe BDF.
+ */
+ ret = mlx5_get_device_guid(&pci_addr, guid, sizeof(guid));
+ if (ret < 0)
+ break;
+ else if (ret > 0) {
+ if (!memcmp(guid, cur_guid, sizeof(guid)) &&
+ owner == info.port_name &&
+ (owner != 0 || (owner == 0 &&
+ !rte_pci_addr_cmp(pci_dev, &pci_addr))))
+ pf = info.port_name;
+ } else if (pci_dev->domain == pci_addr.domain &&
+ pci_dev->bus == pci_addr.bus &&
+ pci_dev->devid == pci_addr.devid &&
+ ((pci_dev->function == 0 &&
+ pci_dev->function + owner == pci_addr.function) ||
+ (pci_dev->function == owner &&
+ pci_addr.function == owner)))
+ pf = info.port_name;
}
if (pf >= 0) {
/* Get bond interface info */
DRV_LOG(INFO, "PF device %u, bond device %u(%s)",
ifindex, bond_info->ifindex, bond_info->ifname);
}
+ if (owner == 0 && pf != 0) {
+ DRV_LOG(INFO, "PCIe instance %04x:%02x:%02x.%x isn't bonding owner",
+ pci_dev->domain, pci_dev->bus, pci_dev->devid,
+ pci_dev->function);
+ }
return pf;
}
config->decap_en = 1;
config->log_hp_size = MLX5_ARG_UNSET;
config->allow_duplicate_pattern = 1;
+ config->std_delay_drop = 0;
+ config->hp_delay_drop = 0;
}
/**
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev, void *ctx,
+mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
struct rte_eth_devargs *req_eth_da,
uint16_t owner_id)
{
struct rte_pci_addr pci_addr;
DRV_LOG(DEBUG, "Checking device \"%s\"", ibv_list[ret]->name);
- bd = mlx5_device_bond_pci_match
- (ibv_list[ret], &owner_pci, nl_rdma, owner_id,
- &bond_info);
+ bd = mlx5_device_bond_pci_match(ibv_list[ret]->name, &owner_pci,
+ nl_rdma, owner_id, &bond_info);
if (bd >= 0) {
/*
* Bonding device detected. Only one match is allowed,
/* Amend owner pci address if owner PF ID specified. */
if (eth_da.nb_representor_ports)
owner_pci.function += owner_id;
- DRV_LOG(INFO, "PCI information matches for"
- " slave %d bonding device \"%s\"",
- bd, ibv_list[ret]->name);
+ DRV_LOG(INFO,
+ "PCI information matches for slave %d bonding device \"%s\"",
+ bd, ibv_list[ret]->name);
ibv_match[nd++] = ibv_list[ret];
break;
} else {
list[ns].max_port = np;
list[ns].phys_port = i;
list[ns].phys_dev_name = ibv_match[0]->name;
- list[ns].ctx = ctx;
list[ns].eth_dev = NULL;
list[ns].pci_dev = pci_dev;
list[ns].cdev = cdev;
/*
* Force standalone bonding
* device for ROCE LAG
- * confgiurations.
+ * configurations.
*/
list[ns].info.master = 0;
list[ns].info.representor = 0;
list[ns].max_port = 1;
list[ns].phys_port = 1;
list[ns].phys_dev_name = ibv_match[i]->name;
- list[ns].ctx = ctx;
list[ns].eth_dev = NULL;
list[ns].pci_dev = pci_dev;
list[ns].cdev = cdev;
}
ret = -1;
if (nl_route >= 0)
- ret = mlx5_nl_switch_info
- (nl_route,
- list[ns].ifindex,
- &list[ns].info);
+ ret = mlx5_nl_switch_info(nl_route,
+ list[ns].ifindex,
+ &list[ns].info);
if (ret || (!list[ns].info.representor &&
!list[ns].info.master)) {
/*
}
/*
* New kernels may add the switch_id attribute for the case
- * there is no E-Switch and we wrongly recognized the
- * only device as master. Override this if there is the
- * single device with single port and new device name
- * format present.
+ * there is no E-Switch and we wrongly recognized the only
+ * device as master. Override this if there is the single
+ * device with single port and new device name format present.
*/
if (nd == 1 &&
list[0].info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
* Representor interrupts handle is released in mlx5_dev_stop().
*/
if (list[i].info.representor) {
- struct rte_intr_handle *intr_handle;
- intr_handle = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
- sizeof(*intr_handle), 0,
- SOCKET_ID_ANY);
- if (!intr_handle) {
+ struct rte_intr_handle *intr_handle =
+ rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+ if (intr_handle == NULL) {
DRV_LOG(ERR,
"port %u failed to allocate memory for interrupt handler "
"Rx interrupts will not be supported",
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_os_pci_probe(struct mlx5_common_device *cdev, void *ctx)
+mlx5_os_pci_probe(struct mlx5_common_device *cdev)
{
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(cdev->dev);
struct rte_eth_devargs eth_da = { .nb_ports = 0 };
if (eth_da.nb_ports > 0) {
/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
for (p = 0; p < eth_da.nb_ports; p++) {
- ret = mlx5_os_pci_probe_pf(cdev, ctx, ð_da,
+ ret = mlx5_os_pci_probe_pf(cdev, ð_da,
eth_da.ports[p]);
if (ret)
break;
}
if (ret) {
DRV_LOG(ERR, "Probe of PCI device " PCI_PRI_FMT " "
- "aborted due to proding failure of PF %u",
+ "aborted due to prodding failure of PF %u",
pci_dev->addr.domain, pci_dev->addr.bus,
pci_dev->addr.devid, pci_dev->addr.function,
eth_da.ports[p]);
mlx5_net_remove(cdev);
}
} else {
- ret = mlx5_os_pci_probe_pf(cdev, ctx, ð_da, 0);
+ ret = mlx5_os_pci_probe_pf(cdev, ð_da, 0);
}
return ret;
}
/* Probe a single SF device on auxiliary bus, no representor support. */
static int
-mlx5_os_auxiliary_probe(struct mlx5_common_device *cdev, void *ctx)
+mlx5_os_auxiliary_probe(struct mlx5_common_device *cdev)
{
struct rte_eth_devargs eth_da = { .nb_ports = 0 };
struct mlx5_dev_config config;
/* Init spawn data. */
spawn.max_port = 1;
spawn.phys_port = 1;
- spawn.ctx = ctx;
- spawn.phys_dev_name = mlx5_os_get_ctx_device_name(ctx);
+ spawn.phys_dev_name = mlx5_os_get_ctx_device_name(cdev->ctx);
ret = mlx5_auxiliary_get_ifindex(dev->name);
if (ret < 0) {
DRV_LOG(ERR, "failed to get ethdev ifindex: %s", dev->name);
if (eth_dev == NULL)
return -rte_errno;
/* Post create. */
- eth_dev->intr_handle = &adev->intr_handle;
+ eth_dev->intr_handle = adev->intr_handle;
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
mlx5_os_net_probe(struct mlx5_common_device *cdev)
{
int ret;
- void *ctx = NULL;
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- ret = mlx5_os_open_device(cdev, &ctx);
- if (ret) {
- DRV_LOG(ERR, "Fail to open device %s", cdev->dev->name);
- return -rte_errno;
- }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
mlx5_pmd_socket_init();
- }
ret = mlx5_init_once();
if (ret) {
DRV_LOG(ERR, "Unable to init PMD global data: %s",
strerror(rte_errno));
- if (ctx != NULL)
- claim_zero(mlx5_glue->close_device(ctx));
return -rte_errno;
}
if (mlx5_dev_is_pci(cdev->dev))
- return mlx5_os_pci_probe(cdev, ctx);
+ return mlx5_os_pci_probe(cdev);
else
- return mlx5_os_auxiliary_probe(cdev, ctx);
+ return mlx5_os_auxiliary_probe(cdev);
}
/**
- * Extract pdn of PD object using DV API.
- *
- * @param[in] pd
- * Pointer to the verbs PD object.
- * @param[out] pdn
- * Pointer to the PD object number variable.
- *
- * @return
- * 0 on success, error value otherwise.
+ * Cleanup resources when the last device is closed.
*/
-int
-mlx5_os_get_pdn(void *pd, uint32_t *pdn)
+void
+mlx5_os_net_cleanup(void)
{
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
- struct mlx5dv_obj obj;
- struct mlx5dv_pd pd_info;
- int ret = 0;
-
- obj.pd.in = pd;
- obj.pd.out = &pd_info;
- ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
- if (ret) {
- DRV_LOG(DEBUG, "Fail to get PD object info");
- return ret;
- }
- *pdn = pd_info.pdn;
- return 0;
-#else
- (void)pd;
- (void)pdn;
- return -ENOTSUP;
-#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+ mlx5_pmd_socket_uninit();
}
/**
{
int ret;
int flags;
+ struct ibv_context *ctx = sh->cdev->ctx;
+
+ sh->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+ if (sh->intr_handle == NULL) {
+ DRV_LOG(ERR, "Fail to allocate intr_handle");
+ rte_errno = ENOMEM;
+ return;
+ }
+ rte_intr_fd_set(sh->intr_handle, -1);
- sh->intr_handle.fd = -1;
- flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
- ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
- F_SETFL, flags | O_NONBLOCK);
+ flags = fcntl(ctx->async_fd, F_GETFL);
+ ret = fcntl(ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (ret) {
DRV_LOG(INFO, "failed to change file descriptor async event"
" queue");
} else {
- sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd;
- sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
- if (rte_intr_callback_register(&sh->intr_handle,
+ rte_intr_fd_set(sh->intr_handle, ctx->async_fd);
+ rte_intr_type_set(sh->intr_handle, RTE_INTR_HANDLE_EXT);
+ if (rte_intr_callback_register(sh->intr_handle,
mlx5_dev_interrupt_handler, sh)) {
DRV_LOG(INFO, "Fail to install the shared interrupt.");
- sh->intr_handle.fd = -1;
+ rte_intr_fd_set(sh->intr_handle, -1);
}
}
if (sh->devx) {
#ifdef HAVE_IBV_DEVX_ASYNC
- sh->intr_handle_devx.fd = -1;
- sh->devx_comp =
- (void *)mlx5_glue->devx_create_cmd_comp(sh->ctx);
+ sh->intr_handle_devx =
+ rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+ if (!sh->intr_handle_devx) {
+ DRV_LOG(ERR, "Fail to allocate intr_handle");
+ rte_errno = ENOMEM;
+ return;
+ }
+ rte_intr_fd_set(sh->intr_handle_devx, -1);
+ sh->devx_comp = (void *)mlx5_glue->devx_create_cmd_comp(ctx);
struct mlx5dv_devx_cmd_comp *devx_comp = sh->devx_comp;
if (!devx_comp) {
DRV_LOG(INFO, "failed to allocate devx_comp.");
" devx comp");
return;
}
- sh->intr_handle_devx.fd = devx_comp->fd;
- sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
- if (rte_intr_callback_register(&sh->intr_handle_devx,
+ rte_intr_fd_set(sh->intr_handle_devx, devx_comp->fd);
+ rte_intr_type_set(sh->intr_handle_devx,
+ RTE_INTR_HANDLE_EXT);
+ if (rte_intr_callback_register(sh->intr_handle_devx,
mlx5_dev_interrupt_handler_devx, sh)) {
DRV_LOG(INFO, "Fail to install the devx shared"
" interrupt.");
- sh->intr_handle_devx.fd = -1;
+ rte_intr_fd_set(sh->intr_handle_devx, -1);
}
#endif /* HAVE_IBV_DEVX_ASYNC */
}
void
mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
{
- if (sh->intr_handle.fd >= 0)
- mlx5_intr_callback_unregister(&sh->intr_handle,
+ if (rte_intr_fd_get(sh->intr_handle) >= 0)
+ mlx5_intr_callback_unregister(sh->intr_handle,
mlx5_dev_interrupt_handler, sh);
+ rte_intr_instance_free(sh->intr_handle);
#ifdef HAVE_IBV_DEVX_ASYNC
- if (sh->intr_handle_devx.fd >= 0)
- rte_intr_callback_unregister(&sh->intr_handle_devx,
+ if (rte_intr_fd_get(sh->intr_handle_devx) >= 0)
+ rte_intr_callback_unregister(sh->intr_handle_devx,
mlx5_dev_interrupt_handler_devx, sh);
+ rte_intr_instance_free(sh->intr_handle_devx);
if (sh->devx_comp)
mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
#endif
return 1;
}
-/**
- * Set the reg_mr and dereg_mr call backs
- *
- * @param reg_mr_cb[out]
- * Pointer to reg_mr func
- * @param dereg_mr_cb[out]
- * Pointer to dereg_mr func
- *
- */
-void
-mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb,
- mlx5_dereg_mr_t *dereg_mr_cb)
-{
- *reg_mr_cb = mlx5_mr_verbs_ops.reg_mr;
- *dereg_mr_cb = mlx5_mr_verbs_ops.dereg_mr;
-}
-
/**
* Remove a MAC address from device
*