X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxq.c;h=ebbe186d96babdb5a97693a9c7f2c08b80f9c02e;hb=693f715da45c48ec1ec0fe4ba2f3b5ffd11ba53e;hp=f2f773e7446019373dcef1d1185d49da2b3acd87;hpb=aa7f63ab35ccac3ef696eb289a98fa851eca21fa;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index f2f773e744..ebbe186d96 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -60,11 +60,531 @@ #endif #include "mlx5.h" -#include "mlx5_autoconf.h" #include "mlx5_rxtx.h" #include "mlx5_utils.h" #include "mlx5_defs.h" +/* Initialization data for hash RX queues. */ +const struct hash_rxq_init hash_rxq_init[] = { + [HASH_RXQ_TCPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4 | + IBV_EXP_RX_HASH_SRC_PORT_TCP | + IBV_EXP_RX_HASH_DST_PORT_TCP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_TCP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], + }, + [HASH_RXQ_UDPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4 | + IBV_EXP_RX_HASH_SRC_PORT_UDP | + IBV_EXP_RX_HASH_DST_PORT_UDP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_UDP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], + }, + [HASH_RXQ_IPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4), + .dpdk_rss_hf = (ETH_RSS_IPV4 | + ETH_RSS_FRAG_IPV4), + .flow_priority = 1, + .flow_spec.ipv4 = { + .type = IBV_EXP_FLOW_SPEC_IPV4, + .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_ETH], + }, +#ifdef HAVE_FLOW_SPEC_IPV6 + [HASH_RXQ_TCPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6 | + IBV_EXP_RX_HASH_SRC_PORT_TCP | + IBV_EXP_RX_HASH_DST_PORT_TCP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_TCP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], + }, + [HASH_RXQ_UDPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6 | + IBV_EXP_RX_HASH_SRC_PORT_UDP | + IBV_EXP_RX_HASH_DST_PORT_UDP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_UDP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], + }, + [HASH_RXQ_IPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6), + .dpdk_rss_hf = (ETH_RSS_IPV6 | + ETH_RSS_FRAG_IPV6), + .flow_priority = 1, + .flow_spec.ipv6 = { + .type = IBV_EXP_FLOW_SPEC_IPV6, + .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_ETH], + }, +#endif /* HAVE_FLOW_SPEC_IPV6 */ + [HASH_RXQ_ETH] = { + .hash_fields = 0, + .dpdk_rss_hf = 0, + .flow_priority = 2, + .flow_spec.eth = { + .type = IBV_EXP_FLOW_SPEC_ETH, + .size = sizeof(hash_rxq_init[0].flow_spec.eth), + }, + .underlayer = NULL, + }, +}; + +/* Number of entries in hash_rxq_init[]. */ +const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init); + +/* Initialization data for hash RX queue indirection tables. */ +static const struct ind_table_init ind_table_init[] = { + { + .max_size = -1u, /* Superseded by HW limitations. */ + .hash_types = + 1 << HASH_RXQ_TCPV4 | + 1 << HASH_RXQ_UDPV4 | + 1 << HASH_RXQ_IPV4 | +#ifdef HAVE_FLOW_SPEC_IPV6 + 1 << HASH_RXQ_TCPV6 | + 1 << HASH_RXQ_UDPV6 | + 1 << HASH_RXQ_IPV6 | +#endif /* HAVE_FLOW_SPEC_IPV6 */ + 0, +#ifdef HAVE_FLOW_SPEC_IPV6 + .hash_types_n = 6, +#else /* HAVE_FLOW_SPEC_IPV6 */ + .hash_types_n = 3, +#endif /* HAVE_FLOW_SPEC_IPV6 */ + }, + { + .max_size = 1, + .hash_types = 1 << HASH_RXQ_ETH, + .hash_types_n = 1, + }, +}; + +#define IND_TABLE_INIT_N RTE_DIM(ind_table_init) + +/* Default RSS hash key also used for ConnectX-3. */ +uint8_t rss_hash_default_key[] = { + 0x2c, 0xc6, 0x81, 0xd1, + 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, + 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, + 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, + 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, + 0xfc, 0x1f, 0xdc, 0x2a, +}; + +/* Length of the default RSS hash key. */ +const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); + +/** + * Populate flow steering rule for a given hash RX queue type using + * information from hash_rxq_init[]. Nothing is written to flow_attr when + * flow_attr_size is not large enough, but the required size is still returned. + * + * @param[in] hash_rxq + * Pointer to hash RX queue. + * @param[out] flow_attr + * Pointer to flow attribute structure to fill. Note that the allocated + * area must be larger and large enough to hold all flow specifications. + * @param flow_attr_size + * Entire size of flow_attr and trailing room for flow specifications. + * + * @return + * Total size of the flow attribute buffer. No errors are defined. + */ +size_t +hash_rxq_flow_attr(const struct hash_rxq *hash_rxq, + struct ibv_exp_flow_attr *flow_attr, + size_t flow_attr_size) +{ + size_t offset = sizeof(*flow_attr); + enum hash_rxq_type type = hash_rxq->type; + const struct hash_rxq_init *init = &hash_rxq_init[type]; + + assert(hash_rxq->priv != NULL); + assert((size_t)type < RTE_DIM(hash_rxq_init)); + do { + offset += init->flow_spec.hdr.size; + init = init->underlayer; + } while (init != NULL); + if (offset > flow_attr_size) + return offset; + flow_attr_size = offset; + init = &hash_rxq_init[type]; + *flow_attr = (struct ibv_exp_flow_attr){ + .type = IBV_EXP_FLOW_ATTR_NORMAL, + .priority = init->flow_priority, + .num_of_specs = 0, + .port = hash_rxq->priv->port, + .flags = 0, + }; + do { + offset -= init->flow_spec.hdr.size; + memcpy((void *)((uintptr_t)flow_attr + offset), + &init->flow_spec, + init->flow_spec.hdr.size); + ++flow_attr->num_of_specs; + init = init->underlayer; + } while (init != NULL); + return flow_attr_size; +} + +/** + * Convert hash type position in indirection table initializer to + * hash RX queue type. + * + * @param table + * Indirection table initializer. + * @param pos + * Hash type position. + * + * @return + * Hash RX queue type. + */ +static enum hash_rxq_type +hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos) +{ + enum hash_rxq_type type = 0; + + assert(pos < table->hash_types_n); + do { + if ((table->hash_types & (1 << type)) && (pos-- == 0)) + break; + ++type; + } while (1); + return type; +} + +/** + * Filter out disabled hash RX queue types from ind_table_init[]. + * + * @param priv + * Pointer to private structure. + * @param[out] table + * Output table. + * + * @return + * Number of table entries. + */ +static unsigned int +priv_make_ind_table_init(struct priv *priv, + struct ind_table_init (*table)[IND_TABLE_INIT_N]) +{ + uint64_t rss_hf; + unsigned int i; + unsigned int j; + unsigned int table_n = 0; + /* Mandatory to receive frames not handled by normal hash RX queues. */ + unsigned int hash_types_sup = 1 << HASH_RXQ_ETH; + + rss_hf = priv->dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + /* Process other protocols only if more than one queue. */ + if (priv->rxqs_n > 1) + for (i = 0; (i != hash_rxq_init_n); ++i) + if (rss_hf & hash_rxq_init[i].dpdk_rss_hf) + hash_types_sup |= (1 << i); + + /* Filter out entries whose protocols are not in the set. */ + for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) { + unsigned int nb; + unsigned int h; + + /* j is increased only if the table has valid protocols. */ + assert(j <= i); + (*table)[j] = ind_table_init[i]; + (*table)[j].hash_types &= hash_types_sup; + for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h) + if (((*table)[j].hash_types >> h) & 0x1) + ++nb; + (*table)[i].hash_types_n = nb; + if (nb) { + ++table_n; + ++j; + } + } + return table_n; +} + +/** + * Initialize hash RX queues and indirection table. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +int +priv_create_hash_rxqs(struct priv *priv) +{ + struct ibv_exp_wq *wqs[priv->reta_idx_n]; + struct ind_table_init ind_table_init[IND_TABLE_INIT_N]; + unsigned int ind_tables_n = + priv_make_ind_table_init(priv, &ind_table_init); + unsigned int hash_rxqs_n = 0; + struct hash_rxq (*hash_rxqs)[] = NULL; + struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL; + unsigned int i; + unsigned int j; + unsigned int k; + int err = 0; + + assert(priv->ind_tables == NULL); + assert(priv->ind_tables_n == 0); + assert(priv->hash_rxqs == NULL); + assert(priv->hash_rxqs_n == 0); + assert(priv->pd != NULL); + assert(priv->ctx != NULL); + if (priv->rxqs_n == 0) + return EINVAL; + assert(priv->rxqs != NULL); + if (ind_tables_n == 0) { + ERROR("all hash RX queue types have been filtered out," + " indirection table cannot be created"); + return EINVAL; + } + if (priv->rxqs_n & (priv->rxqs_n - 1)) { + INFO("%u RX queues are configured, consider rounding this" + " number to the next power of two for better balancing", + priv->rxqs_n); + DEBUG("indirection table extended to assume %u WQs", + priv->reta_idx_n); + } + for (i = 0; (i != priv->reta_idx_n); ++i) + wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq; + /* Get number of hash RX queues to configure. */ + for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) + hash_rxqs_n += ind_table_init[i].hash_types_n; + DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables", + hash_rxqs_n, priv->rxqs_n, ind_tables_n); + /* Create indirection tables. */ + ind_tables = rte_calloc(__func__, ind_tables_n, + sizeof((*ind_tables)[0]), 0); + if (ind_tables == NULL) { + err = ENOMEM; + ERROR("cannot allocate indirection tables container: %s", + strerror(err)); + goto error; + } + for (i = 0; (i != ind_tables_n); ++i) { + struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = { + .pd = priv->pd, + .log_ind_tbl_size = 0, /* Set below. */ + .ind_tbl = wqs, + .comp_mask = 0, + }; + unsigned int ind_tbl_size = ind_table_init[i].max_size; + struct ibv_exp_rwq_ind_table *ind_table; + + if (priv->reta_idx_n < ind_tbl_size) + ind_tbl_size = priv->reta_idx_n; + ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size); + errno = 0; + ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, + &ind_init_attr); + if (ind_table != NULL) { + (*ind_tables)[i] = ind_table; + continue; + } + /* Not clear whether errno is set. */ + err = (errno ? errno : EINVAL); + ERROR("RX indirection table creation failed with error %d: %s", + err, strerror(err)); + goto error; + } + /* Allocate array that holds hash RX queues and related data. */ + hash_rxqs = rte_calloc(__func__, hash_rxqs_n, + sizeof((*hash_rxqs)[0]), 0); + if (hash_rxqs == NULL) { + err = ENOMEM; + ERROR("cannot allocate hash RX queues container: %s", + strerror(err)); + goto error; + } + for (i = 0, j = 0, k = 0; + ((i != hash_rxqs_n) && (j != ind_tables_n)); + ++i) { + struct hash_rxq *hash_rxq = &(*hash_rxqs)[i]; + enum hash_rxq_type type = + hash_rxq_type_from_pos(&ind_table_init[j], k); + struct rte_eth_rss_conf *priv_rss_conf = + (*priv->rss_conf)[type]; + struct ibv_exp_rx_hash_conf hash_conf = { + .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = (priv_rss_conf ? + priv_rss_conf->rss_key_len : + rss_hash_default_key_len), + .rx_hash_key = (priv_rss_conf ? + priv_rss_conf->rss_key : + rss_hash_default_key), + .rx_hash_fields_mask = hash_rxq_init[type].hash_fields, + .rwq_ind_tbl = (*ind_tables)[j], + }; + struct ibv_exp_qp_init_attr qp_init_attr = { + .max_inl_recv = 0, /* Currently not supported. */ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_RX_HASH), + .pd = priv->pd, + .rx_hash_conf = &hash_conf, + .port_num = priv->port, + }; + + DEBUG("using indirection table %u for hash RX queue %u type %d", + j, i, type); + *hash_rxq = (struct hash_rxq){ + .priv = priv, + .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr), + .type = type, + }; + if (hash_rxq->qp == NULL) { + err = (errno ? errno : EINVAL); + ERROR("Hash RX QP creation failure: %s", + strerror(err)); + goto error; + } + if (++k < ind_table_init[j].hash_types_n) + continue; + /* Switch to the next indirection table and reset hash RX + * queue type array index. */ + ++j; + k = 0; + } + priv->ind_tables = ind_tables; + priv->ind_tables_n = ind_tables_n; + priv->hash_rxqs = hash_rxqs; + priv->hash_rxqs_n = hash_rxqs_n; + assert(err == 0); + return 0; +error: + if (hash_rxqs != NULL) { + for (i = 0; (i != hash_rxqs_n); ++i) { + struct ibv_qp *qp = (*hash_rxqs)[i].qp; + + if (qp == NULL) + continue; + claim_zero(ibv_destroy_qp(qp)); + } + rte_free(hash_rxqs); + } + if (ind_tables != NULL) { + for (j = 0; (j != ind_tables_n); ++j) { + struct ibv_exp_rwq_ind_table *ind_table = + (*ind_tables)[j]; + + if (ind_table == NULL) + continue; + claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); + } + rte_free(ind_tables); + } + return err; +} + +/** + * Clean up hash RX queues and indirection table. + * + * @param priv + * Pointer to private structure. + */ +void +priv_destroy_hash_rxqs(struct priv *priv) +{ + unsigned int i; + + DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n); + if (priv->hash_rxqs_n == 0) { + assert(priv->hash_rxqs == NULL); + assert(priv->ind_tables == NULL); + return; + } + for (i = 0; (i != priv->hash_rxqs_n); ++i) { + struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i]; + unsigned int j, k; + + assert(hash_rxq->priv == priv); + assert(hash_rxq->qp != NULL); + /* Also check that there are no remaining flows. */ + assert(hash_rxq->allmulti_flow == NULL); + assert(hash_rxq->promisc_flow == NULL); + for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j) + for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k) + assert(hash_rxq->mac_flow[j][k] == NULL); + claim_zero(ibv_destroy_qp(hash_rxq->qp)); + } + priv->hash_rxqs_n = 0; + rte_free(priv->hash_rxqs); + priv->hash_rxqs = NULL; + for (i = 0; (i != priv->ind_tables_n); ++i) { + struct ibv_exp_rwq_ind_table *ind_table = + (*priv->ind_tables)[i]; + + assert(ind_table != NULL); + claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); + } + priv->ind_tables_n = 0; + rte_free(priv->ind_tables); + priv->ind_tables = NULL; +} + +/** + * Check whether a given flow type is allowed. + * + * @param priv + * Pointer to private structure. + * @param type + * Flow type to check. + * + * @return + * Nonzero if the given flow type is allowed. + */ +int +priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type) +{ + /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode + * has been requested. */ + if (priv->promisc_req) + return type == HASH_RXQ_FLOW_TYPE_PROMISC; + switch (type) { + case HASH_RXQ_FLOW_TYPE_PROMISC: + return !!priv->promisc_req; + case HASH_RXQ_FLOW_TYPE_ALLMULTI: + return !!priv->allmulti_req; + case HASH_RXQ_FLOW_TYPE_MAC: + return 1; + } + return 0; +} + /** * Allocate RX queue elements with scattered packets support. * @@ -336,15 +856,15 @@ rxq_cleanup(struct rxq *rxq) rxq_free_elts_sp(rxq); else rxq_free_elts(rxq); - if (rxq->if_qp != NULL) { + if (rxq->if_wq != NULL) { assert(rxq->priv != NULL); assert(rxq->priv->ctx != NULL); - assert(rxq->qp != NULL); + assert(rxq->wq != NULL); params = (struct ibv_exp_release_intf_params){ .comp_mask = 0, }; claim_zero(ibv_exp_release_intf(rxq->priv->ctx, - rxq->if_qp, + rxq->if_wq, ¶ms)); } if (rxq->if_cq != NULL) { @@ -358,12 +878,8 @@ rxq_cleanup(struct rxq *rxq) rxq->if_cq, ¶ms)); } - if (rxq->qp != NULL) { - rxq_promiscuous_disable(rxq); - rxq_allmulticast_disable(rxq); - rxq_mac_addrs_del(rxq); - claim_zero(ibv_destroy_qp(rxq->qp)); - } + if (rxq->wq != NULL) + claim_zero(ibv_exp_destroy_wq(rxq->wq)); if (rxq->cq != NULL) claim_zero(ibv_destroy_cq(rxq->cq)); if (rxq->rd != NULL) { @@ -382,112 +898,6 @@ rxq_cleanup(struct rxq *rxq) memset(rxq, 0, sizeof(*rxq)); } -/** - * Allocate a Queue Pair. - * Optionally setup inline receive if supported. - * - * @param priv - * Pointer to private structure. - * @param cq - * Completion queue to associate with QP. - * @param desc - * Number of descriptors in QP (hint only). - * - * @return - * QP pointer or NULL in case of error. - */ -static struct ibv_qp * -rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc, - struct ibv_exp_res_domain *rd) -{ - struct ibv_exp_qp_init_attr attr = { - /* CQ to be associated with the send queue. */ - .send_cq = cq, - /* CQ to be associated with the receive queue. */ - .recv_cq = cq, - .cap = { - /* Max number of outstanding WRs. */ - .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ? - priv->device_attr.max_qp_wr : - desc), - /* Max number of scatter/gather elements in a WR. */ - .max_recv_sge = ((priv->device_attr.max_sge < - MLX5_PMD_SGE_WR_N) ? - priv->device_attr.max_sge : - MLX5_PMD_SGE_WR_N), - }, - .qp_type = IBV_QPT_RAW_PACKET, - .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | - IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), - .pd = priv->pd, - .res_domain = rd, - }; - - return ibv_exp_create_qp(priv->ctx, &attr); -} - -#ifdef RSS_SUPPORT - -/** - * Allocate a RSS Queue Pair. - * Optionally setup inline receive if supported. - * - * @param priv - * Pointer to private structure. - * @param cq - * Completion queue to associate with QP. - * @param desc - * Number of descriptors in QP (hint only). - * @param parent - * If nonzero, create a parent QP, otherwise a child. - * - * @return - * QP pointer or NULL in case of error. - */ -static struct ibv_qp * -rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc, - int parent, struct ibv_exp_res_domain *rd) -{ - struct ibv_exp_qp_init_attr attr = { - /* CQ to be associated with the send queue. */ - .send_cq = cq, - /* CQ to be associated with the receive queue. */ - .recv_cq = cq, - .cap = { - /* Max number of outstanding WRs. */ - .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ? - priv->device_attr.max_qp_wr : - desc), - /* Max number of scatter/gather elements in a WR. */ - .max_recv_sge = ((priv->device_attr.max_sge < - MLX5_PMD_SGE_WR_N) ? - priv->device_attr.max_sge : - MLX5_PMD_SGE_WR_N), - }, - .qp_type = IBV_QPT_RAW_PACKET, - .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | - IBV_EXP_QP_INIT_ATTR_RES_DOMAIN | - IBV_EXP_QP_INIT_ATTR_QPG), - .pd = priv->pd, - .res_domain = rd, - }; - - if (parent) { - attr.qpg.qpg_type = IBV_EXP_QPG_PARENT; - /* TSS isn't necessary. */ - attr.qpg.parent_attrib.tss_child_count = 0; - attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n; - DEBUG("initializing parent RSS queue"); - } else { - attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX; - attr.qpg.qpg_parent = priv->rxq_parent.qp; - DEBUG("initializing child RSS queue"); - } - return ibv_exp_create_qp(priv->ctx, &attr); -} - -#endif /* RSS_SUPPORT */ - /** * Reconfigure a RX queue with new parameters. * @@ -512,15 +922,9 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) unsigned int desc_n; struct rte_mbuf **pool; unsigned int i, k; - struct ibv_exp_qp_attr mod; + struct ibv_exp_wq_attr mod; int err; - int parent = (rxq == &priv->rxq_parent); - if (parent) { - ERROR("%p: cannot rehash parent queue %p", - (void *)dev, (void *)rxq); - return EINVAL; - } DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq); /* Number of descriptors and mbufs currently allocated. */ desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1)); @@ -549,61 +953,17 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) DEBUG("%p: nothing to do", (void *)dev); return 0; } - /* Remove attached flows if RSS is disabled (no parent queue). */ - if (!priv->rss) { - rxq_allmulticast_disable(&tmpl); - rxq_promiscuous_disable(&tmpl); - rxq_mac_addrs_del(&tmpl); - /* Update original queue in case of failure. */ - rxq->allmulti_flow = tmpl.allmulti_flow; - rxq->promisc_flow = tmpl.promisc_flow; - memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow)); - } /* From now on, any failure will render the queue unusable. - * Reinitialize QP. */ - mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET }; - err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); - if (err) { - ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err)); - assert(err > 0); - return err; - } - err = ibv_resize_cq(tmpl.cq, desc_n); - if (err) { - ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err)); - assert(err > 0); - return err; - } - mod = (struct ibv_exp_qp_attr){ - /* Move the QP to this state. */ - .qp_state = IBV_QPS_INIT, - /* Primary port number. */ - .port_num = priv->port + * Reinitialize WQ. */ + mod = (struct ibv_exp_wq_attr){ + .attr_mask = IBV_EXP_WQ_ATTR_STATE, + .wq_state = IBV_EXP_WQS_RESET, }; - err = ibv_exp_modify_qp(tmpl.qp, &mod, - (IBV_EXP_QP_STATE | -#ifdef RSS_SUPPORT - (parent ? IBV_EXP_QP_GROUP_RSS : 0) | -#endif /* RSS_SUPPORT */ - IBV_EXP_QP_PORT)); + err = ibv_exp_modify_wq(tmpl.wq, &mod); if (err) { - ERROR("%p: QP state to IBV_QPS_INIT failed: %s", - (void *)dev, strerror(err)); + ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err)); assert(err > 0); return err; - }; - /* Reconfigure flows. Do not care for errors. */ - if (!priv->rss) { - if (priv->started) - rxq_mac_addrs_add(&tmpl); - if (priv->started && priv->promisc_req) - rxq_promiscuous_enable(&tmpl); - if (priv->started && priv->allmulti_req) - rxq_allmulticast_enable(&tmpl); - /* Update original queue in case of failure. */ - rxq->allmulti_flow = tmpl.allmulti_flow; - rxq->promisc_flow = tmpl.promisc_flow; - memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow)); } /* Allocate pool. */ pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0); @@ -655,21 +1015,27 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) rxq->elts_n = 0; rte_free(rxq->elts.sp); rxq->elts.sp = NULL; + /* Change queue state to ready. */ + mod = (struct ibv_exp_wq_attr){ + .attr_mask = IBV_EXP_WQ_ATTR_STATE, + .wq_state = IBV_EXP_WQS_RDY, + }; + err = ibv_exp_modify_wq(tmpl.wq, &mod); + if (err) { + ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", + (void *)dev, strerror(err)); + goto error; + } /* Post SGEs. */ - assert(tmpl.if_qp != NULL); + assert(tmpl.if_wq != NULL); if (tmpl.sp) { struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; for (i = 0; (i != RTE_DIM(*elts)); ++i) { -#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST - err = tmpl.if_qp->recv_sg_list - (tmpl.qp, + err = tmpl.if_wq->recv_sg_list + (tmpl.wq, (*elts)[i].sges, RTE_DIM((*elts)[i].sges)); -#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ - errno = ENOSYS; - err = -1; -#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ if (err) break; } @@ -677,8 +1043,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; for (i = 0; (i != RTE_DIM(*elts)); ++i) { - err = tmpl.if_qp->recv_burst( - tmpl.qp, + err = tmpl.if_wq->recv_burst( + tmpl.wq, &(*elts)[i].sge, 1); if (err) @@ -690,16 +1056,9 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) (void *)dev, err); /* Set err because it does not contain a valid errno value. */ err = EIO; - goto skip_rtr; + goto error; } - mod = (struct ibv_exp_qp_attr){ - .qp_state = IBV_QPS_RTR - }; - err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); - if (err) - ERROR("%p: QP state to IBV_QPS_RTR failed: %s", - (void *)dev, strerror(err)); -skip_rtr: +error: *rxq = tmpl; assert(err >= 0); return err; @@ -735,30 +1094,20 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, .mp = mp, .socket = socket }; - struct ibv_exp_qp_attr mod; + struct ibv_exp_wq_attr mod; union { struct ibv_exp_query_intf_params params; struct ibv_exp_cq_init_attr cq; struct ibv_exp_res_domain_init_attr rd; + struct ibv_exp_wq_init_attr wq; } attr; enum ibv_exp_query_intf_status status; struct rte_mbuf *buf; int ret = 0; - int parent = (rxq == &priv->rxq_parent); unsigned int i; + unsigned int cq_size = desc; (void)conf; /* Thresholds configuration (ignored). */ - /* - * If this is a parent queue, hardware must support RSS and - * RSS must be enabled. - */ - assert((!parent) || ((priv->hw_rss) && (priv->rss))); - if (parent) { - /* Even if unused, ibv_create_cq() requires at least one - * descriptor. */ - desc = 1; - goto skip_mr; - } if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) { ERROR("%p: invalid number of RX descriptors (must be a" " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N); @@ -801,7 +1150,6 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, (void *)dev, strerror(ret)); goto error; } -skip_mr: attr.rd = (struct ibv_exp_res_domain_init_attr){ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | IBV_EXP_RES_DOMAIN_MSG_MODEL), @@ -819,7 +1167,8 @@ skip_mr: .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN, .res_domain = tmpl.rd, }; - tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq); + tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0, + &attr.cq); if (tmpl.cq == NULL) { ret = ENOMEM; ERROR("%p: CQ creation failure: %s", @@ -830,48 +1179,30 @@ skip_mr: priv->device_attr.max_qp_wr); DEBUG("priv->device_attr.max_sge is %d", priv->device_attr.max_sge); -#ifdef RSS_SUPPORT - if (priv->rss) - tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent, - tmpl.rd); - else -#endif /* RSS_SUPPORT */ - tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd); - if (tmpl.qp == NULL) { - ret = (errno ? errno : EINVAL); - ERROR("%p: QP creation failure: %s", - (void *)dev, strerror(ret)); - goto error; - } - mod = (struct ibv_exp_qp_attr){ - /* Move the QP to this state. */ - .qp_state = IBV_QPS_INIT, - /* Primary port number. */ - .port_num = priv->port + attr.wq = (struct ibv_exp_wq_init_attr){ + .wq_context = NULL, /* Could be useful in the future. */ + .wq_type = IBV_EXP_WQT_RQ, + /* Max number of outstanding WRs. */ + .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ? + priv->device_attr.max_qp_wr : + (int)cq_size), + /* Max number of scatter/gather elements in a WR. */ + .max_recv_sge = ((priv->device_attr.max_sge < + MLX5_PMD_SGE_WR_N) ? + priv->device_attr.max_sge : + MLX5_PMD_SGE_WR_N), + .pd = priv->pd, + .cq = tmpl.cq, + .comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN, + .res_domain = tmpl.rd, }; - ret = ibv_exp_modify_qp(tmpl.qp, &mod, - (IBV_EXP_QP_STATE | -#ifdef RSS_SUPPORT - (parent ? IBV_EXP_QP_GROUP_RSS : 0) | -#endif /* RSS_SUPPORT */ - IBV_EXP_QP_PORT)); - if (ret) { - ERROR("%p: QP state to IBV_QPS_INIT failed: %s", + tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq); + if (tmpl.wq == NULL) { + ret = (errno ? errno : EINVAL); + ERROR("%p: WQ creation failure: %s", (void *)dev, strerror(ret)); goto error; } - if ((parent) || (!priv->rss)) { - /* Configure MAC and broadcast addresses. */ - ret = rxq_mac_addrs_add(&tmpl); - if (ret) { - ERROR("%p: QP flow attachment failed: %s", - (void *)dev, strerror(ret)); - goto error; - } - } - /* Allocate descriptors for RX queues, except for the RSS parent. */ - if (parent) - goto skip_alloc; if (tmpl.sp) ret = rxq_alloc_elts_sp(&tmpl, desc, NULL); else @@ -881,7 +1212,6 @@ skip_mr: (void *)dev, strerror(ret)); goto error; } -skip_alloc: /* Save port ID. */ tmpl.port_id = dev->data->port_id; DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); @@ -898,38 +1228,44 @@ skip_alloc: } attr.params = (struct ibv_exp_query_intf_params){ .intf_scope = IBV_EXP_INTF_GLOBAL, - .intf = IBV_EXP_INTF_QP_BURST, - .obj = tmpl.qp, + .intf = IBV_EXP_INTF_WQ, + .obj = tmpl.wq, }; - tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); - if (tmpl.if_qp == NULL) { - ERROR("%p: QP interface family query failed with status %d", + tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_wq == NULL) { + ERROR("%p: WQ interface family query failed with status %d", (void *)dev, status); goto error; } + /* Change queue state to ready. */ + mod = (struct ibv_exp_wq_attr){ + .attr_mask = IBV_EXP_WQ_ATTR_STATE, + .wq_state = IBV_EXP_WQS_RDY, + }; + ret = ibv_exp_modify_wq(tmpl.wq, &mod); + if (ret) { + ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", + (void *)dev, strerror(ret)); + goto error; + } /* Post SGEs. */ - if (!parent && tmpl.sp) { + if (tmpl.sp) { struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; for (i = 0; (i != RTE_DIM(*elts)); ++i) { -#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST - ret = tmpl.if_qp->recv_sg_list - (tmpl.qp, + ret = tmpl.if_wq->recv_sg_list + (tmpl.wq, (*elts)[i].sges, RTE_DIM((*elts)[i].sges)); -#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ - errno = ENOSYS; - ret = -1; -#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ if (ret) break; } - } else if (!parent) { + } else { struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; for (i = 0; (i != RTE_DIM(*elts)); ++i) { - ret = tmpl.if_qp->recv_burst( - tmpl.qp, + ret = tmpl.if_wq->recv_burst( + tmpl.wq, &(*elts)[i].sge, 1); if (ret) @@ -943,15 +1279,6 @@ skip_alloc: ret = EIO; goto error; } - mod = (struct ibv_exp_qp_attr){ - .qp_state = IBV_QPS_RTR - }; - ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); - if (ret) { - ERROR("%p: QP state to IBV_QPS_RTR failed: %s", - (void *)dev, strerror(ret)); - goto error; - } /* Clean up rxq in case we're reinitializing it. */ DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq); rxq_cleanup(rxq); @@ -1055,7 +1382,6 @@ mlx5_rx_queue_release(void *dpdk_rxq) return; priv = rxq->priv; priv_lock(priv); - assert(rxq != &priv->rxq_parent); for (i = 0; (i != priv->rxqs_n); ++i) if ((*priv->rxqs)[i] == rxq) { DEBUG("%p: removing RX queue %p from list",