From: Nelio Laranjeiro Date: Mon, 2 Nov 2015 18:11:57 +0000 (+0100) Subject: mlx5: support RETA query and update X-Git-Tag: spdx-start~8173 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=634efbc2c8c0;p=dpdk.git mlx5: support RETA query and update ConnectX-4 adapters do not have a constant indirection table size, which is set at runtime from the number of RX queues. The maximum size is retrieved using a hardware query and is normally 512. Since the current RETA API cannot handle a variable size, any query/update command causes it to be silently updated to RSS_INDIRECTION_TABLE_SIZE entries regardless of the original size. Also due to the underlying type of the configuration structure, the maximum size is limited to RSS_INDIRECTION_TABLE_SIZE (currently 128, at most 256 entries). A port stop/start must be done to apply the new RETA configuration. Signed-off-by: Nelio Laranjeiro --- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 9636588e31..5a9526069c 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -133,6 +133,8 @@ mlx5_dev_close(struct rte_eth_dev *dev) rte_free((*priv->rss_conf)[i]); rte_free(priv->rss_conf); } + if (priv->reta_idx != NULL) + rte_free(priv->reta_idx); priv_unlock(priv); memset(priv, 0, sizeof(*priv)); } @@ -160,6 +162,8 @@ static const struct eth_dev_ops mlx5_dev_ops = { .mac_addr_remove = mlx5_mac_addr_remove, .mac_addr_add = mlx5_mac_addr_add, .mtu_set = mlx5_dev_set_mtu, + .reta_update = mlx5_dev_rss_reta_update, + .reta_query = mlx5_dev_rss_reta_query, .rss_hash_update = mlx5_rss_hash_update, .rss_hash_conf_get = mlx5_rss_hash_conf_get, }; @@ -373,7 +377,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) DEBUG("L2 tunnel checksum offloads are %ssupported", (priv->hw_csum_l2tun ? "" : "not ")); - priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; + priv->ind_table_max_size = + RTE_MIN((unsigned int)RSS_INDIRECTION_TABLE_SIZE, + exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size); DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 0daacc853f..b84d31d593 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -118,6 +118,8 @@ struct priv { /* RSS configuration array indexed by hash RX queue type. */ struct rte_eth_rss_conf *(*rss_conf)[]; struct rte_intr_handle intr_handle; /* Interrupt handler. */ + unsigned int (*reta_idx)[]; /* RETA index table. */ + unsigned int reta_idx_n; /* RETA index size. */ rte_spinlock_t lock; /* Lock for control functions. */ }; @@ -184,6 +186,11 @@ int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int, uint64_t); int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *); int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *); +int priv_rss_reta_index_resize(struct priv *, unsigned int); +int mlx5_dev_rss_reta_query(struct rte_eth_dev *, + struct rte_eth_rss_reta_entry64 *, uint16_t); +int mlx5_dev_rss_reta_update(struct rte_eth_dev *, + struct rte_eth_rss_reta_entry64 *, uint16_t); /* mlx5_rxmode.c */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 84e877cd69..1159fa3d45 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -410,6 +410,9 @@ dev_configure(struct rte_eth_dev *dev) struct priv *priv = dev->data->dev_private; unsigned int rxqs_n = dev->data->nb_rx_queues; unsigned int txqs_n = dev->data->nb_tx_queues; + unsigned int i; + unsigned int j; + unsigned int reta_idx_n; priv->rxqs = (void *)dev->data->rx_queues; priv->txqs = (void *)dev->data->tx_queues; @@ -418,11 +421,31 @@ dev_configure(struct rte_eth_dev *dev) (void *)dev, priv->txqs_n, txqs_n); priv->txqs_n = txqs_n; } + if (rxqs_n > priv->ind_table_max_size) { + ERROR("cannot handle this many RX queues (%u)", rxqs_n); + return EINVAL; + } if (rxqs_n == priv->rxqs_n) return 0; INFO("%p: RX queues number update: %u -> %u", (void *)dev, priv->rxqs_n, rxqs_n); priv->rxqs_n = rxqs_n; + /* If the requested number of RX queues is not a power of two, use the + * maximum indirection table size for better balancing. + * The result is always rounded to the next power of two. */ + reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? + priv->ind_table_max_size : + rxqs_n)); + if (priv_rss_reta_index_resize(priv, reta_idx_n)) + return ENOMEM; + /* When the number of RX queues is not a power of two, the remaining + * table entries are padded with reused WQs and hashes are not spread + * uniformly. */ + for (i = 0, j = 0; (i != reta_idx_n); ++i) { + (*priv->reta_idx)[i] = j; + if (++j == rxqs_n) + j = 0; + } return 0; } @@ -494,6 +517,12 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 0); if (priv_get_ifname(priv, &ifname) == 0) info->if_index = if_nametoindex(ifname); + /* FIXME: RETA update/query API expects the callee to know the size of + * the indirection table, for this PMD the size varies depending on + * the number of RX queues, it becomes impossible to find the correct + * size if it is not fixed. + * The API should be updated to solve this problem. */ + info->reta_size = priv->ind_table_max_size; priv_unlock(priv); } diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c index bf19aca145..7eb688ae96 100644 --- a/drivers/net/mlx5/mlx5_rss.c +++ b/drivers/net/mlx5/mlx5_rss.c @@ -211,3 +211,166 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev, priv_unlock(priv); return 0; } + +/** + * Allocate/reallocate RETA index table. + * + * @param priv + * Pointer to private structure. + * @praram reta_size + * The size of the array to allocate. + * + * @return + * 0 on success, errno value on failure. + */ +int +priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size) +{ + void *mem; + unsigned int old_size = priv->reta_idx_n; + + if (priv->reta_idx_n == reta_size) + return 0; + + mem = rte_realloc(priv->reta_idx, + reta_size * sizeof((*priv->reta_idx)[0]), 0); + if (!mem) + return ENOMEM; + priv->reta_idx = mem; + priv->reta_idx_n = reta_size; + + if (old_size < reta_size) + memset(&(*priv->reta_idx)[old_size], 0, + (reta_size - old_size) * + sizeof((*priv->reta_idx)[0])); + return 0; +} + +/** + * Query RETA table. + * + * @param priv + * Pointer to private structure. + * @param[in, out] reta_conf + * Pointer to the first RETA configuration structure. + * @param reta_size + * Number of entries. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_dev_rss_reta_query(struct priv *priv, + struct rte_eth_rss_reta_entry64 *reta_conf, + unsigned int reta_size) +{ + unsigned int idx; + unsigned int i; + int ret; + + /* See RETA comment in mlx5_dev_infos_get(). */ + ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size); + if (ret) + return ret; + + /* Fill each entry of the table even if its bit is not set. */ + for (idx = 0, i = 0; (i != reta_size); ++i) { + idx = i / RTE_RETA_GROUP_SIZE; + reta_conf[idx].reta[i % RTE_RETA_GROUP_SIZE] = + (*priv->reta_idx)[i]; + } + return 0; +} + +/** + * Update RETA table. + * + * @param priv + * Pointer to private structure. + * @param[in] reta_conf + * Pointer to the first RETA configuration structure. + * @param reta_size + * Number of entries. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_dev_rss_reta_update(struct priv *priv, + struct rte_eth_rss_reta_entry64 *reta_conf, + unsigned int reta_size) +{ + unsigned int idx; + unsigned int i; + unsigned int pos; + int ret; + + /* See RETA comment in mlx5_dev_infos_get(). */ + ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size); + if (ret) + return ret; + + for (idx = 0, i = 0; (i != reta_size); ++i) { + idx = i / RTE_RETA_GROUP_SIZE; + pos = i % RTE_RETA_GROUP_SIZE; + if (((reta_conf[idx].mask >> i) & 0x1) == 0) + continue; + assert(reta_conf[idx].reta[pos] < priv->rxqs_n); + (*priv->reta_idx)[i] = reta_conf[idx].reta[pos]; + } + return 0; +} + +/** + * DPDK callback to get the RETA indirection table. + * + * @param dev + * Pointer to Ethernet device structure. + * @param reta_conf + * Pointer to RETA configuration structure array. + * @param reta_size + * Size of the RETA table. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +mlx5_dev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + int ret; + struct priv *priv = dev->data->dev_private; + + priv_lock(priv); + ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size); + priv_unlock(priv); + return -ret; +} + +/** + * DPDK callback to update the RETA indirection table. + * + * @param dev + * Pointer to Ethernet device structure. + * @param reta_conf + * Pointer to RETA configuration structure array. + * @param reta_size + * Size of the RETA table. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + int ret; + struct priv *priv = dev->data->dev_private; + + priv_lock(priv); + ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size); + priv_unlock(priv); + return -ret; +} diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 084bf41a0b..3d7ae7e194 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -258,26 +258,6 @@ hash_rxq_flow_attr(const struct hash_rxq *hash_rxq, return flow_attr_size; } -/** - * Return nearest power of two above input value. - * - * @param v - * Input value. - * - * @return - * Nearest power of two above input value. - */ -static unsigned int -log2above(unsigned int v) -{ - unsigned int l; - unsigned int r; - - for (l = 0, r = 0; (v >> 1); ++l, v >>= 1) - r |= (v & 1); - return (l + r); -} - /** * Return the type corresponding to the n'th bit set. * @@ -360,14 +340,7 @@ priv_make_ind_table_init(struct priv *priv, int priv_create_hash_rxqs(struct priv *priv) { - /* If the requested number of WQs is not a power of two, use the - * maximum indirection table size for better balancing. - * The result is always rounded to the next power of two. */ - unsigned int wqs_n = - (1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ? - priv->ind_table_max_size : - priv->rxqs_n)); - struct ibv_exp_wq *wqs[wqs_n]; + struct ibv_exp_wq *wqs[priv->reta_idx_n]; struct ind_table_init ind_table_init[IND_TABLE_INIT_N]; unsigned int ind_tables_n = priv_make_ind_table_init(priv, &ind_table_init); @@ -393,25 +366,15 @@ priv_create_hash_rxqs(struct priv *priv) " indirection table cannot be created"); return EINVAL; } - if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) { - ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n); - err = ERANGE; - goto error; - } - if (wqs_n != priv->rxqs_n) { + if (priv->rxqs_n & (priv->rxqs_n - 1)) { INFO("%u RX queues are configured, consider rounding this" " number to the next power of two for better balancing", priv->rxqs_n); - DEBUG("indirection table extended to assume %u WQs", wqs_n); - } - /* When the number of RX queues is not a power of two, the remaining - * table entries are padded with reused WQs and hashes are not spread - * uniformly. */ - for (i = 0, j = 0; (i != wqs_n); ++i) { - wqs[i] = (*priv->rxqs)[j]->wq; - if (++j == priv->rxqs_n) - j = 0; + DEBUG("indirection table extended to assume %u WQs", + priv->reta_idx_n); } + for (i = 0; (i != priv->reta_idx_n); ++i) + wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq; /* Get number of hash RX queues to configure. */ for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) hash_rxqs_n += ind_table_init[i].hash_types_n; @@ -436,8 +399,8 @@ priv_create_hash_rxqs(struct priv *priv) unsigned int ind_tbl_size = ind_table_init[i].max_size; struct ibv_exp_rwq_ind_table *ind_table; - if (wqs_n < ind_tbl_size) - ind_tbl_size = wqs_n; + if (priv->reta_idx_n < ind_tbl_size) + ind_tbl_size = priv->reta_idx_n; ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size); errno = 0; ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index f1fad188bf..9b5e86a8a7 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -161,4 +161,24 @@ pmd_drv_log_basename(const char *s) \ snprintf(name, sizeof(name), __VA_ARGS__) +/** + * Return nearest power of two above input value. + * + * @param v + * Input value. + * + * @return + * Nearest power of two above input value. + */ +static inline unsigned int +log2above(unsigned int v) +{ + unsigned int l; + unsigned int r; + + for (l = 0, r = 0; (v >> 1); ++l, v >>= 1) + r |= (v & 1); + return (l + r); +} + #endif /* RTE_PMD_MLX5_UTILS_H_ */