X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxq.c;h=cbb017bb966cf965f722fd665edb1f57047ddcf0;hb=ce2eabdd43ecfdd3ada379ed884063e82fa266e9;hp=6d8f7d2a7ed8b05c3d96701586378b07c6a81f9b;hpb=ecc1c29df8564d4e880e9b5946114763424cf60c;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 6d8f7d2a7e..cbb017bb96 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -62,10 +62,135 @@ #include "mlx5.h" #include "mlx5_rxtx.h" #include "mlx5_utils.h" +#include "mlx5_autoconf.h" #include "mlx5_defs.h" +/* Initialization data for hash RX queues. */ +const struct hash_rxq_init hash_rxq_init[] = { + [HASH_RXQ_TCPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4 | + IBV_EXP_RX_HASH_SRC_PORT_TCP | + IBV_EXP_RX_HASH_DST_PORT_TCP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_TCP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], + }, + [HASH_RXQ_UDPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4 | + IBV_EXP_RX_HASH_SRC_PORT_UDP | + IBV_EXP_RX_HASH_DST_PORT_UDP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_UDP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], + }, + [HASH_RXQ_IPV4] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | + IBV_EXP_RX_HASH_DST_IPV4), + .dpdk_rss_hf = (ETH_RSS_IPV4 | + ETH_RSS_FRAG_IPV4), + .flow_priority = 1, + .flow_spec.ipv4 = { + .type = IBV_EXP_FLOW_SPEC_IPV4, + .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_ETH], + }, +#ifdef HAVE_FLOW_SPEC_IPV6 + [HASH_RXQ_TCPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6 | + IBV_EXP_RX_HASH_SRC_PORT_TCP | + IBV_EXP_RX_HASH_DST_PORT_TCP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_TCP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], + }, + [HASH_RXQ_UDPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6 | + IBV_EXP_RX_HASH_SRC_PORT_UDP | + IBV_EXP_RX_HASH_DST_PORT_UDP), + .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, + .flow_priority = 0, + .flow_spec.tcp_udp = { + .type = IBV_EXP_FLOW_SPEC_UDP, + .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], + }, + [HASH_RXQ_IPV6] = { + .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | + IBV_EXP_RX_HASH_DST_IPV6), + .dpdk_rss_hf = (ETH_RSS_IPV6 | + ETH_RSS_FRAG_IPV6), + .flow_priority = 1, + .flow_spec.ipv6 = { + .type = IBV_EXP_FLOW_SPEC_IPV6, + .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), + }, + .underlayer = &hash_rxq_init[HASH_RXQ_ETH], + }, +#endif /* HAVE_FLOW_SPEC_IPV6 */ + [HASH_RXQ_ETH] = { + .hash_fields = 0, + .dpdk_rss_hf = 0, + .flow_priority = 2, + .flow_spec.eth = { + .type = IBV_EXP_FLOW_SPEC_ETH, + .size = sizeof(hash_rxq_init[0].flow_spec.eth), + }, + .underlayer = NULL, + }, +}; + +/* Number of entries in hash_rxq_init[]. */ +const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init); + +/* Initialization data for hash RX queue indirection tables. */ +static const struct ind_table_init ind_table_init[] = { + { + .max_size = -1u, /* Superseded by HW limitations. */ + .hash_types = + 1 << HASH_RXQ_TCPV4 | + 1 << HASH_RXQ_UDPV4 | + 1 << HASH_RXQ_IPV4 | +#ifdef HAVE_FLOW_SPEC_IPV6 + 1 << HASH_RXQ_TCPV6 | + 1 << HASH_RXQ_UDPV6 | + 1 << HASH_RXQ_IPV6 | +#endif /* HAVE_FLOW_SPEC_IPV6 */ + 0, +#ifdef HAVE_FLOW_SPEC_IPV6 + .hash_types_n = 6, +#else /* HAVE_FLOW_SPEC_IPV6 */ + .hash_types_n = 3, +#endif /* HAVE_FLOW_SPEC_IPV6 */ + }, + { + .max_size = 1, + .hash_types = 1 << HASH_RXQ_ETH, + .hash_types_n = 1, + }, +}; + +#define IND_TABLE_INIT_N RTE_DIM(ind_table_init) + /* Default RSS hash key also used for ConnectX-3. */ -static uint8_t hash_rxq_default_key[] = { +uint8_t rss_hash_default_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 0xfc, 0xa2, 0x83, 0x19, @@ -78,24 +203,141 @@ static uint8_t hash_rxq_default_key[] = { 0xfc, 0x1f, 0xdc, 0x2a, }; +/* Length of the default RSS hash key. */ +const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); + /** - * Return nearest power of two above input value. + * Populate flow steering rule for a given hash RX queue type using + * information from hash_rxq_init[]. Nothing is written to flow_attr when + * flow_attr_size is not large enough, but the required size is still returned. * - * @param v - * Input value. + * @param priv + * Pointer to private structure. + * @param[out] flow_attr + * Pointer to flow attribute structure to fill. Note that the allocated + * area must be larger and large enough to hold all flow specifications. + * @param flow_attr_size + * Entire size of flow_attr and trailing room for flow specifications. + * @param type + * Hash RX queue type to use for flow steering rule. * * @return - * Nearest power of two above input value. + * Total size of the flow attribute buffer. No errors are defined. + */ +size_t +priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr, + size_t flow_attr_size, enum hash_rxq_type type) +{ + size_t offset = sizeof(*flow_attr); + const struct hash_rxq_init *init = &hash_rxq_init[type]; + + assert(priv != NULL); + assert((size_t)type < RTE_DIM(hash_rxq_init)); + do { + offset += init->flow_spec.hdr.size; + init = init->underlayer; + } while (init != NULL); + if (offset > flow_attr_size) + return offset; + flow_attr_size = offset; + init = &hash_rxq_init[type]; + *flow_attr = (struct ibv_exp_flow_attr){ + .type = IBV_EXP_FLOW_ATTR_NORMAL, +#ifdef MLX5_FDIR_SUPPORT + /* Priorities < 3 are reserved for flow director. */ + .priority = init->flow_priority + 3, +#else /* MLX5_FDIR_SUPPORT */ + .priority = init->flow_priority, +#endif /* MLX5_FDIR_SUPPORT */ + .num_of_specs = 0, + .port = priv->port, + .flags = 0, + }; + do { + offset -= init->flow_spec.hdr.size; + memcpy((void *)((uintptr_t)flow_attr + offset), + &init->flow_spec, + init->flow_spec.hdr.size); + ++flow_attr->num_of_specs; + init = init->underlayer; + } while (init != NULL); + return flow_attr_size; +} + +/** + * Convert hash type position in indirection table initializer to + * hash RX queue type. + * + * @param table + * Indirection table initializer. + * @param pos + * Hash type position. + * + * @return + * Hash RX queue type. + */ +static enum hash_rxq_type +hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos) +{ + enum hash_rxq_type type = 0; + + assert(pos < table->hash_types_n); + do { + if ((table->hash_types & (1 << type)) && (pos-- == 0)) + break; + ++type; + } while (1); + return type; +} + +/** + * Filter out disabled hash RX queue types from ind_table_init[]. + * + * @param priv + * Pointer to private structure. + * @param[out] table + * Output table. + * + * @return + * Number of table entries. */ static unsigned int -log2above(unsigned int v) +priv_make_ind_table_init(struct priv *priv, + struct ind_table_init (*table)[IND_TABLE_INIT_N]) { - unsigned int l; - unsigned int r; + uint64_t rss_hf; + unsigned int i; + unsigned int j; + unsigned int table_n = 0; + /* Mandatory to receive frames not handled by normal hash RX queues. */ + unsigned int hash_types_sup = 1 << HASH_RXQ_ETH; - for (l = 0, r = 0; (v >> 1); ++l, v >>= 1) - r |= (v & 1); - return (l + r); + rss_hf = priv->rss_hf; + /* Process other protocols only if more than one queue. */ + if (priv->rxqs_n > 1) + for (i = 0; (i != hash_rxq_init_n); ++i) + if (rss_hf & hash_rxq_init[i].dpdk_rss_hf) + hash_types_sup |= (1 << i); + + /* Filter out entries whose protocols are not in the set. */ + for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) { + unsigned int nb; + unsigned int h; + + /* j is increased only if the table has valid protocols. */ + assert(j <= i); + (*table)[j] = ind_table_init[i]; + (*table)[j].hash_types &= hash_types_sup; + for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h) + if (((*table)[j].hash_types >> h) & 0x1) + ++nb; + (*table)[i].hash_types_n = nb; + if (nb) { + ++table_n; + ++j; + } + } + return table_n; } /** @@ -110,21 +352,20 @@ log2above(unsigned int v) int priv_create_hash_rxqs(struct priv *priv) { - static const uint64_t rss_hash_table[] = { - /* TCPv4. */ - (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 | - IBV_EXP_RX_HASH_SRC_PORT_TCP | IBV_EXP_RX_HASH_DST_PORT_TCP), - /* UDPv4. */ - (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 | - IBV_EXP_RX_HASH_SRC_PORT_UDP | IBV_EXP_RX_HASH_DST_PORT_UDP), - /* Other IPv4. */ - (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4), - /* None, used for everything else. */ - 0, - }; + struct ibv_exp_wq *wqs[priv->reta_idx_n]; + struct ind_table_init ind_table_init[IND_TABLE_INIT_N]; + unsigned int ind_tables_n = + priv_make_ind_table_init(priv, &ind_table_init); + unsigned int hash_rxqs_n = 0; + struct hash_rxq (*hash_rxqs)[] = NULL; + struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL; + unsigned int i; + unsigned int j; + unsigned int k; + int err = 0; - DEBUG("allocating hash RX queues for %u WQs", priv->rxqs_n); - assert(priv->ind_table == NULL); + assert(priv->ind_tables == NULL); + assert(priv->ind_tables_n == 0); assert(priv->hash_rxqs == NULL); assert(priv->hash_rxqs_n == 0); assert(priv->pd != NULL); @@ -132,47 +373,54 @@ priv_create_hash_rxqs(struct priv *priv) if (priv->rxqs_n == 0) return EINVAL; assert(priv->rxqs != NULL); - - /* FIXME: large data structures are allocated on the stack. */ - unsigned int wqs_n = (1 << log2above(priv->rxqs_n)); - struct ibv_exp_wq *wqs[wqs_n]; - struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = { - .pd = priv->pd, - .log_ind_tbl_size = log2above(priv->rxqs_n), - .ind_tbl = wqs, - .comp_mask = 0, - }; - struct ibv_exp_rwq_ind_table *ind_table = NULL; - /* If only one RX queue is configured, RSS is not needed and a single - * empty hash entry is used (last rss_hash_table[] entry). */ - unsigned int hash_rxqs_n = - ((priv->rxqs_n == 1) ? 1 : RTE_DIM(rss_hash_table)); - struct hash_rxq (*hash_rxqs)[hash_rxqs_n] = NULL; - unsigned int i; - unsigned int j; - int err = 0; - - if (wqs_n < priv->rxqs_n) { - ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n); - err = ERANGE; - goto error; + if (ind_tables_n == 0) { + ERROR("all hash RX queue types have been filtered out," + " indirection table cannot be created"); + return EINVAL; + } + if (priv->rxqs_n & (priv->rxqs_n - 1)) { + INFO("%u RX queues are configured, consider rounding this" + " number to the next power of two for better balancing", + priv->rxqs_n); + DEBUG("indirection table extended to assume %u WQs", + priv->reta_idx_n); } - if (wqs_n != priv->rxqs_n) - WARN("%u RX queues are configured, consider rounding this" - " number to the next power of two (%u) for optimal" - " performance", - priv->rxqs_n, wqs_n); - /* When the number of RX queues is not a power of two, the remaining - * table entries are padded with reused WQs and hashes are not spread - * uniformly. */ - for (i = 0, j = 0; (i != wqs_n); ++i) { - wqs[i] = (*priv->rxqs)[j]->wq; - if (++j == priv->rxqs_n) - j = 0; + for (i = 0; (i != priv->reta_idx_n); ++i) + wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq; + /* Get number of hash RX queues to configure. */ + for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) + hash_rxqs_n += ind_table_init[i].hash_types_n; + DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables", + hash_rxqs_n, priv->rxqs_n, ind_tables_n); + /* Create indirection tables. */ + ind_tables = rte_calloc(__func__, ind_tables_n, + sizeof((*ind_tables)[0]), 0); + if (ind_tables == NULL) { + err = ENOMEM; + ERROR("cannot allocate indirection tables container: %s", + strerror(err)); + goto error; } - errno = 0; - ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, &ind_init_attr); - if (ind_table == NULL) { + for (i = 0; (i != ind_tables_n); ++i) { + struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = { + .pd = priv->pd, + .log_ind_tbl_size = 0, /* Set below. */ + .ind_tbl = wqs, + .comp_mask = 0, + }; + unsigned int ind_tbl_size = ind_table_init[i].max_size; + struct ibv_exp_rwq_ind_table *ind_table; + + if (priv->reta_idx_n < ind_tbl_size) + ind_tbl_size = priv->reta_idx_n; + ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size); + errno = 0; + ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, + &ind_init_attr); + if (ind_table != NULL) { + (*ind_tables)[i] = ind_table; + continue; + } /* Not clear whether errno is set. */ err = (errno ? errno : EINVAL); ERROR("RX indirection table creation failed with error %d: %s", @@ -180,24 +428,32 @@ priv_create_hash_rxqs(struct priv *priv) goto error; } /* Allocate array that holds hash RX queues and related data. */ - hash_rxqs = rte_malloc(__func__, sizeof(*hash_rxqs), 0); + hash_rxqs = rte_calloc(__func__, hash_rxqs_n, + sizeof((*hash_rxqs)[0]), 0); if (hash_rxqs == NULL) { err = ENOMEM; ERROR("cannot allocate hash RX queues container: %s", strerror(err)); goto error; } - for (i = 0, j = (RTE_DIM(rss_hash_table) - hash_rxqs_n); - (j != RTE_DIM(rss_hash_table)); - ++i, ++j) { + for (i = 0, j = 0, k = 0; + ((i != hash_rxqs_n) && (j != ind_tables_n)); + ++i) { struct hash_rxq *hash_rxq = &(*hash_rxqs)[i]; - + enum hash_rxq_type type = + hash_rxq_type_from_pos(&ind_table_init[j], k); + struct rte_eth_rss_conf *priv_rss_conf = + (*priv->rss_conf)[type]; struct ibv_exp_rx_hash_conf hash_conf = { .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = sizeof(hash_rxq_default_key), - .rx_hash_key = hash_rxq_default_key, - .rx_hash_fields_mask = rss_hash_table[j], - .rwq_ind_tbl = ind_table, + .rx_hash_key_len = (priv_rss_conf ? + priv_rss_conf->rss_key_len : + rss_hash_default_key_len), + .rx_hash_key = (priv_rss_conf ? + priv_rss_conf->rss_key : + rss_hash_default_key), + .rx_hash_fields_mask = hash_rxq_init[type].hash_fields, + .rwq_ind_tbl = (*ind_tables)[j], }; struct ibv_exp_qp_init_attr qp_init_attr = { .max_inl_recv = 0, /* Currently not supported. */ @@ -209,30 +465,54 @@ priv_create_hash_rxqs(struct priv *priv) .port_num = priv->port, }; + DEBUG("using indirection table %u for hash RX queue %u type %d", + j, i, type); *hash_rxq = (struct hash_rxq){ .priv = priv, .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr), + .type = type, }; if (hash_rxq->qp == NULL) { err = (errno ? errno : EINVAL); ERROR("Hash RX QP creation failure: %s", strerror(err)); - while (i) { - hash_rxq = &(*hash_rxqs)[--i]; - claim_zero(ibv_destroy_qp(hash_rxq->qp)); - } goto error; } + if (++k < ind_table_init[j].hash_types_n) + continue; + /* Switch to the next indirection table and reset hash RX + * queue type array index. */ + ++j; + k = 0; } - priv->ind_table = ind_table; + priv->ind_tables = ind_tables; + priv->ind_tables_n = ind_tables_n; priv->hash_rxqs = hash_rxqs; priv->hash_rxqs_n = hash_rxqs_n; assert(err == 0); return 0; error: - rte_free(hash_rxqs); - if (ind_table != NULL) - claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); + if (hash_rxqs != NULL) { + for (i = 0; (i != hash_rxqs_n); ++i) { + struct ibv_qp *qp = (*hash_rxqs)[i].qp; + + if (qp == NULL) + continue; + claim_zero(ibv_destroy_qp(qp)); + } + rte_free(hash_rxqs); + } + if (ind_tables != NULL) { + for (j = 0; (j != ind_tables_n); ++j) { + struct ibv_exp_rwq_ind_table *ind_table = + (*ind_tables)[j]; + + if (ind_table == NULL) + continue; + claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); + } + rte_free(ind_tables); + } return err; } @@ -250,7 +530,7 @@ priv_destroy_hash_rxqs(struct priv *priv) DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n); if (priv->hash_rxqs_n == 0) { assert(priv->hash_rxqs == NULL); - assert(priv->ind_table == NULL); + assert(priv->ind_tables == NULL); return; } for (i = 0; (i != priv->hash_rxqs_n); ++i) { @@ -260,8 +540,11 @@ priv_destroy_hash_rxqs(struct priv *priv) assert(hash_rxq->priv == priv); assert(hash_rxq->qp != NULL); /* Also check that there are no remaining flows. */ - assert(hash_rxq->allmulti_flow == NULL); - assert(hash_rxq->promisc_flow == NULL); + for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j) + for (k = 0; + (k != RTE_DIM(hash_rxq->special_flow[j])); + ++k) + assert(hash_rxq->special_flow[j][k] == NULL); for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j) for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k) assert(hash_rxq->mac_flow[j][k] == NULL); @@ -270,8 +553,84 @@ priv_destroy_hash_rxqs(struct priv *priv) priv->hash_rxqs_n = 0; rte_free(priv->hash_rxqs); priv->hash_rxqs = NULL; - claim_zero(ibv_exp_destroy_rwq_ind_table(priv->ind_table)); - priv->ind_table = NULL; + for (i = 0; (i != priv->ind_tables_n); ++i) { + struct ibv_exp_rwq_ind_table *ind_table = + (*priv->ind_tables)[i]; + + assert(ind_table != NULL); + claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); + } + priv->ind_tables_n = 0; + rte_free(priv->ind_tables); + priv->ind_tables = NULL; +} + +/** + * Check whether a given flow type is allowed. + * + * @param priv + * Pointer to private structure. + * @param type + * Flow type to check. + * + * @return + * Nonzero if the given flow type is allowed. + */ +int +priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type) +{ + /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode + * has been requested. */ + if (priv->promisc_req) + return type == HASH_RXQ_FLOW_TYPE_PROMISC; + switch (type) { + case HASH_RXQ_FLOW_TYPE_PROMISC: + return !!priv->promisc_req; + case HASH_RXQ_FLOW_TYPE_ALLMULTI: + return !!priv->allmulti_req; + case HASH_RXQ_FLOW_TYPE_BROADCAST: +#ifdef HAVE_FLOW_SPEC_IPV6 + case HASH_RXQ_FLOW_TYPE_IPV6MULTI: +#endif /* HAVE_FLOW_SPEC_IPV6 */ + /* If allmulti is enabled, broadcast and ipv6multi + * are unnecessary. */ + return !priv->allmulti_req; + case HASH_RXQ_FLOW_TYPE_MAC: + return 1; + default: + /* Unsupported flow type is not allowed. */ + return 0; + } + return 0; +} + +/** + * Automatically enable/disable flows according to configuration. + * + * @param priv + * Private structure. + * + * @return + * 0 on success, errno value on failure. + */ +int +priv_rehash_flows(struct priv *priv) +{ + unsigned int i; + + for (i = 0; (i != RTE_DIM((*priv->hash_rxqs)[0].special_flow)); ++i) + if (!priv_allow_flow_type(priv, i)) { + priv_special_flow_disable(priv, i); + } else { + int ret = priv_special_flow_enable(priv, i); + + if (ret) + return ret; + } + if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC)) + return priv_mac_addrs_enable(priv); + priv_mac_addrs_disable(priv); + return 0; } /** @@ -545,6 +904,8 @@ rxq_cleanup(struct rxq *rxq) rxq_free_elts_sp(rxq); else rxq_free_elts(rxq); + rxq->poll = NULL; + rxq->recv = NULL; if (rxq->if_wq != NULL) { assert(rxq->priv != NULL); assert(rxq->priv->ctx != NULL); @@ -747,6 +1108,10 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) err = EIO; goto error; } + if (tmpl.sp) + tmpl.recv = tmpl.if_wq->recv_sg_list; + else + tmpl.recv = tmpl.if_wq->recv_burst; error: *rxq = tmpl; assert(err >= 0); @@ -828,11 +1193,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, DEBUG("%p: %s scattered packets support (%u WRs)", (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc); /* Use the entire RX mempool as the memory region. */ - tmpl.mr = ibv_reg_mr(priv->pd, - (void *)mp->elt_va_start, - (mp->elt_va_end - mp->elt_va_start), - (IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE)); + tmpl.mr = mlx5_mp2mr(priv->pd, mp); if (tmpl.mr == NULL) { ret = EINVAL; ERROR("%p: MR creation failure: %s", @@ -868,6 +1229,8 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, priv->device_attr.max_qp_wr); DEBUG("priv->device_attr.max_sge is %d", priv->device_attr.max_sge); + /* Configure VLAN stripping. */ + tmpl.vlan_strip = dev->data->dev_conf.rxmode.hw_vlan_strip; attr.wq = (struct ibv_exp_wq_init_attr){ .wq_context = NULL, /* Could be useful in the future. */ .wq_type = IBV_EXP_WQT_RQ, @@ -882,9 +1245,58 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, MLX5_PMD_SGE_WR_N), .pd = priv->pd, .cq = tmpl.cq, - .comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN, + .comp_mask = + IBV_EXP_CREATE_WQ_RES_DOMAIN | +#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS + IBV_EXP_CREATE_WQ_VLAN_OFFLOADS | +#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ + 0, .res_domain = tmpl.rd, +#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS + .vlan_offloads = (tmpl.vlan_strip ? + IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : + 0), +#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ }; + +#ifdef HAVE_VERBS_FCS + /* By default, FCS (CRC) is stripped by hardware. */ + if (dev->data->dev_conf.rxmode.hw_strip_crc) { + tmpl.crc_present = 0; + } else if (priv->hw_fcs_strip) { + /* Ask HW/Verbs to leave CRC in place when supported. */ + attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS; + attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; + tmpl.crc_present = 1; + } else { + WARN("%p: CRC stripping has been disabled but will still" + " be performed by hardware, make sure MLNX_OFED and" + " firmware are up to date", + (void *)dev); + tmpl.crc_present = 0; + } + DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" + " incoming frames to hide it", + (void *)dev, + tmpl.crc_present ? "disabled" : "enabled", + tmpl.crc_present << 2); +#endif /* HAVE_VERBS_FCS */ + +#ifdef HAVE_VERBS_RX_END_PADDING + if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) + ; /* Nothing else to do. */ + else if (priv->hw_padding) { + INFO("%p: enabling packet padding on queue %p", + (void *)dev, (void *)rxq); + attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING; + attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; + } else + WARN("%p: packet padding has been requested but is not" + " supported, make sure MLNX_OFED and firmware are" + " up to date", + (void *)dev); +#endif /* HAVE_VERBS_RX_END_PADDING */ + tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq); if (tmpl.wq == NULL) { ret = (errno ? errno : EINVAL); @@ -906,6 +1318,9 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); attr.params = (struct ibv_exp_query_intf_params){ .intf_scope = IBV_EXP_INTF_GLOBAL, +#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS + .intf_version = 1, +#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ .intf = IBV_EXP_INTF_CQ, .obj = tmpl.cq, }; @@ -974,6 +1389,16 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, *rxq = tmpl; DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl); assert(ret == 0); + /* Assign function in queue. */ +#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS + rxq->poll = rxq->if_cq->poll_length_flags_cvlan; +#else /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ + rxq->poll = rxq->if_cq->poll_length_flags; +#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ + if (rxq->sp) + rxq->recv = rxq->if_wq->recv_sg_list; + else + rxq->recv = rxq->if_wq->recv_burst; return 0; error: rxq_cleanup(&tmpl); @@ -1009,6 +1434,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, struct rxq *rxq = (*priv->rxqs)[idx]; int ret; + if (mlx5_is_secondary()) + return -E_RTE_SECONDARY; + priv_lock(priv); DEBUG("%p: configuring queue %u for %u descriptors", (void *)dev, idx, desc); @@ -1067,6 +1495,9 @@ mlx5_rx_queue_release(void *dpdk_rxq) struct priv *priv; unsigned int i; + if (mlx5_is_secondary()) + return; + if (rxq == NULL) return; priv = rxq->priv; @@ -1082,3 +1513,43 @@ mlx5_rx_queue_release(void *dpdk_rxq) rte_free(rxq); priv_unlock(priv); } + +/** + * DPDK callback for RX in secondary processes. + * + * This function configures all queues from primary process information + * if necessary before reverting to the normal RX burst callback. + * + * @param dpdk_rxq + * Generic pointer to RX queue structure. + * @param[out] pkts + * Array to store received packets. + * @param pkts_n + * Maximum number of packets in array. + * + * @return + * Number of packets successfully received (<= pkts_n). + */ +uint16_t +mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts, + uint16_t pkts_n) +{ + struct rxq *rxq = dpdk_rxq; + struct priv *priv = mlx5_secondary_data_setup(rxq->priv); + struct priv *primary_priv; + unsigned int index; + + if (priv == NULL) + return 0; + primary_priv = + mlx5_secondary_data[priv->dev->data->port_id].primary_priv; + /* Look for queue index in both private structures. */ + for (index = 0; index != priv->rxqs_n; ++index) + if (((*primary_priv->rxqs)[index] == rxq) || + ((*priv->rxqs)[index] == rxq)) + break; + if (index == priv->rxqs_n) + return 0; + rxq = (*priv->rxqs)[index]; + return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n); +}