X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx4%2Fmlx4.c;h=9f8ecd0729b4155171794b237465aaeae6c187f5;hb=cbd737416c34c2f71227ac450db2c85a789dac30;hp=06f17703b32a357fac258d986e8401bf77877d1c;hpb=de1df14e6e6ecfe84e125ccfd5f9109e69147e0f;p=dpdk.git diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 06f17703b3..9f8ecd0729 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -44,9 +44,15 @@ #include "mlx4.h" #include "mlx4_glue.h" #include "mlx4_flow.h" +#include "mlx4_mr.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" +struct mlx4_dev_list mlx4_mem_event_cb_list = + LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list); + +rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER; + /** Configuration structure for device arguments. */ struct mlx4_conf { struct { @@ -61,6 +67,8 @@ const char *pmd_mlx4_init_params[] = { NULL, }; +static void mlx4_dev_stop(struct rte_eth_dev *dev); + /** * DPDK callback for Ethernet device configuration. * @@ -90,6 +98,20 @@ mlx4_dev_configure(struct rte_eth_dev *dev) if (ret) ERROR("%p: interrupt handler installation failed", (void *)dev); + /* + * Once the device is added to the list of memory event callback, its + * global MR cache table cannot be expanded on the fly because of + * deadlock. If it overflows, lookup should be done by searching MR list + * linearly, which is slow. + */ + if (mlx4_mr_btree_init(&priv->mr.cache, MLX4_MR_BTREE_CACHE_N * 2, + dev->device->numa_node)) { + /* rte_errno is already set. */ + return -rte_errno; + } + rte_rwlock_write_lock(&mlx4_mem_event_rwlock); + LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb); + rte_rwlock_write_unlock(&mlx4_mem_event_rwlock); exit: return ret; } @@ -123,6 +145,9 @@ mlx4_dev_start(struct rte_eth_dev *dev) (void *)dev, strerror(-ret)); goto err; } +#ifndef NDEBUG + mlx4_mr_dump_dev(dev); +#endif ret = mlx4_rxq_intr_enable(priv); if (ret) { ERROR("%p: interrupt handler installation failed", @@ -143,8 +168,7 @@ mlx4_dev_start(struct rte_eth_dev *dev) dev->rx_pkt_burst = mlx4_rx_burst; return 0; err: - /* Rollback. */ - priv->started = 0; + mlx4_dev_stop(dev); return ret; } @@ -194,10 +218,12 @@ mlx4_dev_close(struct rte_eth_dev *dev) dev->tx_pkt_burst = mlx4_tx_burst_removed; rte_wmb(); mlx4_flow_clean(priv); + mlx4_rss_deinit(priv); for (i = 0; i != dev->data->nb_rx_queues; ++i) mlx4_rx_queue_release(dev->data->rx_queues[i]); for (i = 0; i != dev->data->nb_tx_queues; ++i) mlx4_tx_queue_release(dev->data->tx_queues[i]); + mlx4_mr_release(dev); if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(mlx4_glue->dealloc_pd(priv->pd)); @@ -385,6 +411,99 @@ free_kvlist: return ret; } +/** + * Interpret RSS capabilities reported by device. + * + * This function returns the set of usable Verbs RSS hash fields, kernel + * quirks taken into account. + * + * @param ctx + * Verbs context. + * @param pd + * Verbs protection domain. + * @param device_attr_ex + * Extended device attributes to interpret. + * + * @return + * Usable RSS hash fields mask in Verbs format. + */ +static uint64_t +mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, + struct ibv_device_attr_ex *device_attr_ex) +{ + uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask; + struct ibv_cq *cq = NULL; + struct ibv_wq *wq = NULL; + struct ibv_rwq_ind_table *ind = NULL; + struct ibv_qp *qp = NULL; + + if (!hw_rss_sup) { + WARN("no RSS capabilities reported; disabling support for UDP" + " RSS and inner VXLAN RSS"); + return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 | + IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 | + IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP; + } + if (!(hw_rss_sup & IBV_RX_HASH_INNER)) + return hw_rss_sup; + /* + * Although reported as supported, missing code in some Linux + * versions (v4.15, v4.16) prevents the creation of hash QPs with + * inner capability. + * + * There is no choice but to attempt to instantiate a temporary RSS + * context in order to confirm its support. + */ + cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0); + wq = cq ? mlx4_glue->create_wq + (ctx, + &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, + .max_wr = 1, + .max_sge = 1, + .pd = pd, + .cq = cq, + }) : NULL; + ind = wq ? mlx4_glue->create_rwq_ind_table + (ctx, + &(struct ibv_rwq_ind_table_init_attr){ + .log_ind_tbl_size = 0, + .ind_tbl = &wq, + .comp_mask = 0, + }) : NULL; + qp = ind ? mlx4_glue->create_qp_ex + (ctx, + &(struct ibv_qp_init_attr_ex){ + .comp_mask = + (IBV_QP_INIT_ATTR_PD | + IBV_QP_INIT_ATTR_RX_HASH | + IBV_QP_INIT_ATTR_IND_TABLE), + .qp_type = IBV_QPT_RAW_PACKET, + .pd = pd, + .rwq_ind_tbl = ind, + .rx_hash_conf = { + .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, + .rx_hash_key = mlx4_rss_hash_key_default, + .rx_hash_fields_mask = hw_rss_sup, + }, + }) : NULL; + if (!qp) { + WARN("disabling unusable inner RSS capability due to kernel" + " quirk"); + hw_rss_sup &= ~IBV_RX_HASH_INNER; + } else { + claim_zero(mlx4_glue->destroy_qp(qp)); + } + if (ind) + claim_zero(mlx4_glue->destroy_rwq_ind_table(ind)); + if (wq) + claim_zero(mlx4_glue->destroy_wq(wq)); + if (cq) + claim_zero(mlx4_glue->destroy_cq(cq)); + return hw_rss_sup; +} + static struct rte_pci_driver mlx4_driver; /** @@ -563,19 +682,8 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); DEBUG("L2 tunnel checksum offloads are %ssupported", priv->hw_csum_l2tun ? "" : "not "); - priv->hw_rss_sup = device_attr_ex.rss_caps.rx_hash_fields_mask; - if (!priv->hw_rss_sup) { - WARN("no RSS capabilities reported; disabling support" - " for UDP RSS and inner VXLAN RSS"); - /* Fake support for all possible RSS hash fields. */ - priv->hw_rss_sup = ~UINT64_C(0); - priv->hw_rss_sup = mlx4_conv_rss_hf(priv, -1); - /* Filter out known unsupported fields. */ - priv->hw_rss_sup &= - ~(uint64_t)(IBV_RX_HASH_SRC_PORT_UDP | - IBV_RX_HASH_DST_PORT_UDP | - IBV_RX_HASH_INNER); - } + priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd, + &device_attr_ex); DEBUG("supported RSS hash fields mask: %016" PRIx64, priv->hw_rss_sup); priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & @@ -653,6 +761,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Update link status once if waiting for LSC. */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) mlx4_link_update(eth_dev, 0); + rte_eth_dev_probing_finish(eth_dev); continue; port_error: rte_free(priv); @@ -880,6 +989,8 @@ rte_mlx4_pmd_init(void) } mlx4_glue->fork_init(); rte_pci_register(&mlx4_driver); + rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", + mlx4_mr_mem_event_cb, NULL); } RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);