net/mlx5: set Tx queue affinity in round-robin
[dpdk.git] / drivers / net / mlx5 / mlx5.c
index 8b7629b..97cf2d9 100644 (file)
@@ -36,7 +36,6 @@
 #include "mlx5_rx.h"
 #include "mlx5_tx.h"
 #include "mlx5_autoconf.h"
-#include "mlx5_mr.h"
 #include "mlx5_flow.h"
 #include "mlx5_flow_os.h"
 #include "rte_pmd_mlx5.h"
@@ -520,6 +519,7 @@ mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh)
 static void
 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 {
+       struct mlx5_hca_attr *attr = &sh->cdev->config.hca_attr;
        int i;
 
        memset(&sh->cmng, 0, sizeof(sh->cmng));
@@ -532,6 +532,10 @@ mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
                TAILQ_INIT(&sh->cmng.counters[i]);
                rte_spinlock_init(&sh->cmng.csl[i]);
        }
+       if (sh->devx && !haswell_broadwell_cpu) {
+               sh->cmng.relaxed_ordering_write = attr->relaxed_ordering_write;
+               sh->cmng.relaxed_ordering_read = attr->relaxed_ordering_read;
+       }
 }
 
 /**
@@ -916,7 +920,7 @@ mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev)
         * start after the common header that with the length of a DW(u32).
         */
        node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t);
-       prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node);
+       prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->cdev->ctx, &node);
        if (!prf->obj) {
                DRV_LOG(ERR, "Failed to create flex parser node object.");
                return (rte_errno == 0) ? -ENODEV : -rte_errno;
@@ -1021,7 +1025,8 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
                 */
                uar_mapping = 0;
 #endif
-               sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping);
+               sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
+                                                      uar_mapping);
 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC
                if (!sh->tx_uar &&
                    uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
@@ -1039,8 +1044,8 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
                         */
                        DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)");
                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
-                       sh->tx_uar = mlx5_glue->devx_alloc_uar
-                                                       (sh->ctx, uar_mapping);
+                       sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
+                                                              uar_mapping);
                } else if (!sh->tx_uar &&
                           uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
                        if (config->dbnc == MLX5_TXDB_NCACHED)
@@ -1052,8 +1057,8 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
                         */
                        DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)");
                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
-                       sh->tx_uar = mlx5_glue->devx_alloc_uar
-                                                       (sh->ctx, uar_mapping);
+                       sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
+                                                              uar_mapping);
                }
 #endif
                if (!sh->tx_uar) {
@@ -1080,8 +1085,8 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
        }
        for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
                uar_mapping = 0;
-               sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
-                                                       (sh->ctx, uar_mapping);
+               sh->devx_rx_uar = mlx5_glue->devx_alloc_uar(sh->cdev->ctx,
+                                                           uar_mapping);
 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC
                if (!sh->devx_rx_uar &&
                    uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
@@ -1093,7 +1098,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
                        DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)");
                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
                        sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
-                                                       (sh->ctx, uar_mapping);
+                                                  (sh->cdev->ctx, uar_mapping);
                }
 #endif
                if (!sh->devx_rx_uar) {
@@ -1122,28 +1127,8 @@ exit:
 }
 
 /**
- * Unregister the mempool from the protection domain.
- *
- * @param sh
- *   Pointer to the device shared context.
- * @param mp
- *   Mempool being unregistered.
- */
-static void
-mlx5_dev_ctx_shared_mempool_unregister(struct mlx5_dev_ctx_shared *sh,
-                                      struct rte_mempool *mp)
-{
-       struct mlx5_mp_id mp_id;
-
-       mlx5_mp_id_init(&mp_id, 0);
-       if (mlx5_mr_mempool_unregister(&sh->share_cache, mp, &mp_id) < 0)
-               DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s",
-                       mp->name, sh->pd, rte_strerror(rte_errno));
-}
-
-/**
- * rte_mempool_walk() callback to register mempools
- * for the protection domain.
+ * rte_mempool_walk() callback to unregister Rx mempools.
+ * It used when implicit mempool registration is disabled.
  *
  * @param mp
  *   The mempool being walked.
@@ -1151,64 +1136,11 @@ mlx5_dev_ctx_shared_mempool_unregister(struct mlx5_dev_ctx_shared *sh,
  *   Pointer to the device shared context.
  */
 static void
-mlx5_dev_ctx_shared_mempool_register_cb(struct rte_mempool *mp, void *arg)
+mlx5_dev_ctx_shared_rx_mempool_unregister_cb(struct rte_mempool *mp, void *arg)
 {
        struct mlx5_dev_ctx_shared *sh = arg;
-       struct mlx5_mp_id mp_id;
-       int ret;
 
-       mlx5_mp_id_init(&mp_id, 0);
-       ret = mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp, &mp_id);
-       if (ret < 0 && rte_errno != EEXIST)
-               DRV_LOG(ERR, "Failed to register existing mempool %s for PD %p: %s",
-                       mp->name, sh->pd, rte_strerror(rte_errno));
-}
-
-/**
- * rte_mempool_walk() callback to unregister mempools
- * from the protection domain.
- *
- * @param mp
- *   The mempool being walked.
- * @param arg
- *   Pointer to the device shared context.
- */
-static void
-mlx5_dev_ctx_shared_mempool_unregister_cb(struct rte_mempool *mp, void *arg)
-{
-       mlx5_dev_ctx_shared_mempool_unregister
-                               ((struct mlx5_dev_ctx_shared *)arg, mp);
-}
-
-/**
- * Mempool life cycle callback for Ethernet devices.
- *
- * @param event
- *   Mempool life cycle event.
- * @param mp
- *   Associated mempool.
- * @param arg
- *   Pointer to a device shared context.
- */
-static void
-mlx5_dev_ctx_shared_mempool_event_cb(enum rte_mempool_event event,
-                                    struct rte_mempool *mp, void *arg)
-{
-       struct mlx5_dev_ctx_shared *sh = arg;
-       struct mlx5_mp_id mp_id;
-
-       switch (event) {
-       case RTE_MEMPOOL_EVENT_READY:
-               mlx5_mp_id_init(&mp_id, 0);
-               if (mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp,
-                                            &mp_id) < 0)
-                       DRV_LOG(ERR, "Failed to register new mempool %s for PD %p: %s",
-                               mp->name, sh->pd, rte_strerror(rte_errno));
-               break;
-       case RTE_MEMPOOL_EVENT_DESTROY:
-               mlx5_dev_ctx_shared_mempool_unregister(sh, mp);
-               break;
-       }
+       mlx5_dev_mempool_unregister(sh->cdev, mp);
 }
 
 /**
@@ -1229,7 +1161,7 @@ mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event,
        struct mlx5_dev_ctx_shared *sh = arg;
 
        if (event == RTE_MEMPOOL_EVENT_DESTROY)
-               mlx5_dev_ctx_shared_mempool_unregister(sh, mp);
+               mlx5_dev_mempool_unregister(sh->cdev, mp);
 }
 
 int
@@ -1245,14 +1177,68 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
                                (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh);
                return ret == 0 || rte_errno == EEXIST ? 0 : ret;
        }
-       /* Callback for this shared context may be already registered. */
-       ret = rte_mempool_event_callback_register
-                               (mlx5_dev_ctx_shared_mempool_event_cb, sh);
-       if (ret != 0 && rte_errno != EEXIST)
-               return ret;
-       /* Register mempools only once for this shared context. */
-       if (ret == 0)
-               rte_mempool_walk(mlx5_dev_ctx_shared_mempool_register_cb, sh);
+       return mlx5_dev_mempool_subscribe(sh->cdev);
+}
+
+/**
+ * Set up multiple TISs with different affinities according to
+ * number of bonding ports
+ *
+ * @param priv
+ * Pointer of shared context.
+ *
+ * @return
+ * Zero on success, -1 otherwise.
+ */
+static int
+mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
+{
+       int i;
+       struct mlx5_devx_lag_context lag_ctx = { 0 };
+       struct mlx5_devx_tis_attr tis_attr = { 0 };
+
+       tis_attr.transport_domain = sh->td->id;
+       if (sh->bond.n_port) {
+               if (!mlx5_devx_cmd_query_lag(sh->cdev->ctx, &lag_ctx)) {
+                       sh->lag.tx_remap_affinity[0] =
+                               lag_ctx.tx_remap_affinity_1;
+                       sh->lag.tx_remap_affinity[1] =
+                               lag_ctx.tx_remap_affinity_2;
+                       sh->lag.affinity_mode = lag_ctx.port_select_mode;
+               } else {
+                       DRV_LOG(ERR, "Failed to query lag affinity.");
+                       return -1;
+               }
+               if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+                       for (i = 0; i < sh->bond.n_port; i++) {
+                               tis_attr.lag_tx_port_affinity =
+                                       MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
+                                                       sh->bond.n_port);
+                               sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
+                                               &tis_attr);
+                               if (!sh->tis[i]) {
+                                       DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
+                                               " %s.", i, sh->bond.n_port,
+                                               sh->ibdev_name);
+                                       return -1;
+                               }
+                       }
+                       DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
+                               sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
+                               lag_ctx.tx_remap_affinity_2);
+                       return 0;
+               }
+               if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+                       DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
+                                       sh->ibdev_name);
+       }
+       tis_attr.lag_tx_port_affinity = 0;
+       sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
+       if (!sh->tis[0]) {
+               DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
+                       " %s.", sh->ibdev_name);
+               return -1;
+       }
        return 0;
 }
 
@@ -1278,12 +1264,11 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
  */
 struct mlx5_dev_ctx_shared *
 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
-                          const struct mlx5_dev_config *config)
+                         const struct mlx5_dev_config *config)
 {
        struct mlx5_dev_ctx_shared *sh;
        int err = 0;
        uint32_t i;
-       struct mlx5_devx_tis_attr tis_attr = { 0 };
 
        MLX5_ASSERT(spawn);
        /* Secondary process should not create the shared context. */
@@ -1291,8 +1276,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
        /* Search for IB context by device name. */
        LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) {
-               if (!strcmp(sh->ibdev_name,
-                       mlx5_os_get_ctx_device_name(spawn->ctx))) {
+               if (!strcmp(sh->ibdev_name, spawn->phys_dev_name)) {
                        sh->refcnt++;
                        goto exit;
                }
@@ -1313,10 +1297,9 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
        sh->numa_node = spawn->cdev->dev->numa_node;
        sh->cdev = spawn->cdev;
        sh->devx = sh->cdev->config.devx;
-       sh->ctx = spawn->ctx;
        if (spawn->bond_info)
                sh->bond = *spawn->bond_info;
-       err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr);
+       err = mlx5_os_get_dev_attr(sh->cdev, &sh->device_attr);
        if (err) {
                DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed");
                goto error;
@@ -1324,9 +1307,9 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
        sh->refcnt = 1;
        sh->max_port = spawn->max_port;
        sh->reclaim_mode = config->reclaim_mode;
-       strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx),
+       strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->cdev->ctx),
                sizeof(sh->ibdev_name) - 1);
-       strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx),
+       strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->cdev->ctx),
                sizeof(sh->ibdev_path) - 1);
        /*
         * Setting port_id to max unallowed value means
@@ -1337,27 +1320,14 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
                sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
                sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
        }
-       sh->pd = mlx5_os_alloc_pd(sh->ctx);
-       if (sh->pd == NULL) {
-               DRV_LOG(ERR, "PD allocation failure");
-               err = ENOMEM;
-               goto error;
-       }
        if (sh->devx) {
-               err = mlx5_os_get_pdn(sh->pd, &sh->pdn);
-               if (err) {
-                       DRV_LOG(ERR, "Fail to extract pdn from PD");
-                       goto error;
-               }
-               sh->td = mlx5_devx_cmd_create_td(sh->ctx);
+               sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx);
                if (!sh->td) {
                        DRV_LOG(ERR, "TD allocation failure");
                        err = ENOMEM;
                        goto error;
                }
-               tis_attr.transport_domain = sh->td->id;
-               sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
-               if (!sh->tis) {
+               if (mlx5_setup_tis(sh)) {
                        DRV_LOG(ERR, "TIS allocation failure");
                        err = ENOMEM;
                        goto error;
@@ -1377,24 +1347,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
        for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
                rte_spinlock_init(&sh->uar_lock[i]);
 #endif
-       /*
-        * Once the device is added to the list of memory event
-        * callback, its global MR cache table cannot be expanded
-        * on the fly because of deadlock. If it overflows, lookup
-        * should be done by searching MR list linearly, which is slow.
-        *
-        * At this point the device is not added to the memory
-        * event list yet, context is just being created.
-        */
-       err = mlx5_mr_btree_init(&sh->share_cache.cache,
-                                MLX5_MR_BTREE_CACHE_N * 2,
-                                sh->numa_node);
-       if (err) {
-               err = rte_errno;
-               goto error;
-       }
-       mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb,
-                             &sh->share_cache.dereg_mr_cb);
        mlx5_os_dev_shared_handler_install(sh);
        if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
                err = mlx5_flow_os_init_workspace_once();
@@ -1404,11 +1356,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
        mlx5_flow_aging_init(sh);
        mlx5_flow_counters_mng_init(sh);
        mlx5_flow_ipool_create(sh, config);
-       /* Add device to memory callback list. */
-       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
-       LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
-                        sh, mem_event_cb);
-       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
        /* Add context to the global device list. */
        LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
        rte_spinlock_init(&sh->geneve_tlv_opt_sl);
@@ -1419,20 +1366,17 @@ error:
        pthread_mutex_destroy(&sh->txpp.mutex);
        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
        MLX5_ASSERT(sh);
-       if (sh->share_cache.cache.table)
-               mlx5_mr_btree_free(&sh->share_cache.cache);
-       if (sh->tis)
-               claim_zero(mlx5_devx_cmd_destroy(sh->tis));
        if (sh->td)
                claim_zero(mlx5_devx_cmd_destroy(sh->td));
+       i = 0;
+       do {
+               if (sh->tis[i])
+                       claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
+       } while (++i < (uint32_t)sh->bond.n_port);
        if (sh->devx_rx_uar)
                mlx5_glue->devx_free_uar(sh->devx_rx_uar);
        if (sh->tx_uar)
                mlx5_glue->devx_free_uar(sh->tx_uar);
-       if (sh->pd)
-               claim_zero(mlx5_os_dealloc_pd(sh->pd));
-       if (sh->ctx)
-               claim_zero(mlx5_glue->close_device(sh->ctx));
        mlx5_free(sh);
        MLX5_ASSERT(err > 0);
        rte_errno = err;
@@ -1450,6 +1394,7 @@ void
 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
 {
        int ret;
+       int i = 0;
 
        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
 #ifdef RTE_LIBRTE_MLX5_DEBUG
@@ -1472,25 +1417,20 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
        if (--sh->refcnt)
                goto exit;
        /* Stop watching for mempool events and unregister all mempools. */
-       ret = rte_mempool_event_callback_unregister
-                               (mlx5_dev_ctx_shared_mempool_event_cb, sh);
-       if (ret < 0 && rte_errno == ENOENT)
+       if (!sh->cdev->config.mr_mempool_reg_en) {
                ret = rte_mempool_event_callback_unregister
                                (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh);
-       if (ret == 0)
-               rte_mempool_walk(mlx5_dev_ctx_shared_mempool_unregister_cb,
-                                sh);
-       /* Remove from memory callback device list. */
-       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
-       LIST_REMOVE(sh, mem_event_cb);
-       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
-       /* Release created Memory Regions. */
-       mlx5_mr_release_cache(&sh->share_cache);
+               if (ret == 0)
+                       rte_mempool_walk
+                            (mlx5_dev_ctx_shared_rx_mempool_unregister_cb, sh);
+       }
        /* Remove context from the global device list. */
        LIST_REMOVE(sh, next);
-       /* Release flow workspaces objects on the last device. */
-       if (LIST_EMPTY(&mlx5_dev_ctx_list))
+       /* Release resources on the last device removal. */
+       if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
+               mlx5_os_net_cleanup();
                mlx5_flow_os_release_workspace();
+       }
        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
        /*
         *  Ensure there is no async event handler installed.
@@ -1509,16 +1449,14 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
                mlx5_glue->devx_free_uar(sh->tx_uar);
                sh->tx_uar = NULL;
        }
-       if (sh->pd)
-               claim_zero(mlx5_os_dealloc_pd(sh->pd));
-       if (sh->tis)
-               claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+       do {
+               if (sh->tis[i])
+                       claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
+       } while (++i < sh->bond.n_port);
        if (sh->td)
                claim_zero(mlx5_devx_cmd_destroy(sh->td));
        if (sh->devx_rx_uar)
                mlx5_glue->devx_free_uar(sh->devx_rx_uar);
-       if (sh->ctx)
-               claim_zero(mlx5_glue->close_device(sh->ctx));
        MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
        pthread_mutex_destroy(&sh->txpp.mutex);
        mlx5_free(sh);
@@ -1724,8 +1662,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                return 0;
        DRV_LOG(DEBUG, "port %u closing device \"%s\"",
                dev->data->port_id,
-               ((priv->sh->ctx != NULL) ?
-               mlx5_os_get_ctx_device_name(priv->sh->ctx) : ""));
+               ((priv->sh->cdev->ctx != NULL) ?
+               mlx5_os_get_ctx_device_name(priv->sh->cdev->ctx) : ""));
        /*
         * If default mreg copy action is removed at the stop stage,
         * the search will return none and nothing will be done anymore.
@@ -2653,8 +2591,6 @@ static struct mlx5_class_driver mlx5_net_driver = {
        .id_table = mlx5_pci_id_map,
        .probe = mlx5_os_net_probe,
        .remove = mlx5_net_remove,
-       .dma_map = mlx5_net_dma_map,
-       .dma_unmap = mlx5_net_dma_unmap,
        .probe_again = 1,
        .intr_lsc = 1,
        .intr_rmv = 1,