net/hns3: support flow control
[dpdk.git] / drivers / net / mlx5 / mlx5.c
index 19e1f41..0528ed3 100644 (file)
@@ -32,7 +32,6 @@
 #include <rte_bus_pci.h>
 #include <rte_common.h>
 #include <rte_config.h>
-#include <rte_eal_memconfig.h>
 #include <rte_kvargs.h>
 #include <rte_rwlock.h>
 #include <rte_spinlock.h>
@@ -269,6 +268,37 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
        memset(&sh->cmng, 0, sizeof(sh->cmng));
 }
 
+/**
+ * Extract pdn of PD object using DV API.
+ *
+ * @param[in] pd
+ *   Pointer to the verbs PD object.
+ * @param[out] pdn
+ *   Pointer to the PD object number variable.
+ *
+ * @return
+ *   0 on success, error value otherwise.
+ */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+static int
+mlx5_get_pdn(struct ibv_pd *pd __rte_unused, uint32_t *pdn __rte_unused)
+{
+       struct mlx5dv_obj obj;
+       struct mlx5dv_pd pd_info;
+       int ret = 0;
+
+       obj.pd.in = pd;
+       obj.pd.out = &pd_info;
+       ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
+       if (ret) {
+               DRV_LOG(DEBUG, "Fail to get PD object info");
+               return ret;
+       }
+       *pdn = pd_info.pdn;
+       return 0;
+}
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+
 /**
  * Allocate shared IB device context. If there is multiport device the
  * master and representors will share this context, if there is single
@@ -357,6 +387,13 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn)
                err = ENOMEM;
                goto error;
        }
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       err = mlx5_get_pdn(sh->pd, &sh->pdn);
+       if (err) {
+               DRV_LOG(ERR, "Fail to extract pdn from PD");
+               goto error;
+       }
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
        /*
         * Once the device is added to the list of memory event
         * callback, its global MR cache table cannot be expanded
@@ -374,6 +411,12 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn)
                goto error;
        }
        mlx5_flow_counters_mng_init(sh);
+       /* Add device to memory callback list. */
+       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
+       LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
+                        sh, mem_event_cb);
+       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
+       /* Add context to the global device list. */
        LIST_INSERT_HEAD(&mlx5_ibv_list, sh, next);
 exit:
        pthread_mutex_unlock(&mlx5_ibv_list_mutex);
@@ -423,6 +466,11 @@ mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh)
                goto exit;
        /* Release created Memory Regions. */
        mlx5_mr_release(sh);
+       /* Remove from memory callback device list. */
+       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
+       LIST_REMOVE(sh, mem_event_cb);
+       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
+       /* Remove context from the global device list. */
        LIST_REMOVE(sh, next);
        /*
         *  Ensure there is no async event handler installed.
@@ -500,6 +548,7 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
                sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
        }
 #endif
+       sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
        sh->dv_refcnt++;
        priv->dr_shared = 1;
        return 0;
@@ -522,6 +571,10 @@ error:
                mlx5_glue->destroy_flow_action(sh->esw_drop_action);
                sh->esw_drop_action = NULL;
        }
+       if (sh->pop_vlan_action) {
+               mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+               sh->pop_vlan_action = NULL;
+       }
        return err;
 #else
        (void)priv;
@@ -567,6 +620,10 @@ mlx5_free_shared_dr(struct mlx5_priv *priv)
                sh->esw_drop_action = NULL;
        }
 #endif
+       if (sh->pop_vlan_action) {
+               mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+               sh->pop_vlan_action = NULL;
+       }
        pthread_mutex_destroy(&sh->dv_mutex);
 #else
        (void)priv;
@@ -695,6 +752,31 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
        rte_free(ptr);
 }
 
+/**
+ * DPDK callback to add udp tunnel port
+ *
+ * @param[in] dev
+ *   A pointer to eth_dev
+ * @param[in] udp_tunnel
+ *   A pointer to udp tunnel
+ *
+ * @return
+ *   0 on valid udp ports and tunnels, -ENOTSUP otherwise.
+ */
+int
+mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused,
+                        struct rte_eth_udp_tunnel *udp_tunnel)
+{
+       assert(udp_tunnel != NULL);
+       if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN &&
+           udp_tunnel->udp_port == 4789)
+               return 0;
+       if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE &&
+           udp_tunnel->udp_port == 4790)
+               return 0;
+       return -ENOTSUP;
+}
+
 /**
  * Initialize process private data structure.
  *
@@ -789,11 +871,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
        }
        mlx5_proc_priv_uninit(dev);
        mlx5_mprq_free_mp(dev);
-       /* Remove from memory callback device list. */
-       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
-       assert(priv->sh);
-       LIST_REMOVE(priv->sh, mem_event_cb);
-       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
        mlx5_free_shared_dr(priv);
        if (priv->rss_conf.rss_key != NULL)
                rte_free(priv->rss_conf.rss_key);
@@ -805,6 +882,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                close(priv->nl_socket_route);
        if (priv->nl_socket_rdma >= 0)
                close(priv->nl_socket_rdma);
+       if (priv->vmwa_context)
+               mlx5_vlan_vmwa_exit(priv->vmwa_context);
        if (priv->sh) {
                /*
                 * Free the shared context in last turn, because the cleanup
@@ -815,17 +894,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                mlx5_free_shared_ibctx(priv->sh);
                priv->sh = NULL;
        }
-       ret = mlx5_hrxq_ibv_verify(dev);
+       ret = mlx5_hrxq_verify(dev);
        if (ret)
                DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
                        dev->data->port_id);
-       ret = mlx5_ind_table_ibv_verify(dev);
+       ret = mlx5_ind_table_obj_verify(dev);
        if (ret)
                DRV_LOG(WARNING, "port %u some indirection table still remain",
                        dev->data->port_id);
-       ret = mlx5_rxq_ibv_verify(dev);
+       ret = mlx5_rxq_obj_verify(dev);
        if (ret)
-               DRV_LOG(WARNING, "port %u some Verbs Rx queue still remain",
+               DRV_LOG(WARNING, "port %u some Rx queue objects still remain",
                        dev->data->port_id);
        ret = mlx5_rxq_verify(dev);
        if (ret)
@@ -916,6 +995,9 @@ const struct eth_dev_ops mlx5_dev_ops = {
        .rx_queue_intr_enable = mlx5_rx_intr_enable,
        .rx_queue_intr_disable = mlx5_rx_intr_disable,
        .is_removed = mlx5_is_removed,
+       .udp_tunnel_port_add  = mlx5_udp_tunnel_port_add,
+       .get_module_info = mlx5_get_module_info,
+       .get_module_eeprom = mlx5_get_module_eeprom,
 };
 
 /* Available operations from secondary process. */
@@ -929,6 +1011,8 @@ static const struct eth_dev_ops mlx5_dev_sec_ops = {
        .dev_infos_get = mlx5_dev_infos_get,
        .rx_descriptor_status = mlx5_rx_descriptor_status,
        .tx_descriptor_status = mlx5_tx_descriptor_status,
+       .get_module_info = mlx5_get_module_info,
+       .get_module_eeprom = mlx5_get_module_eeprom,
 };
 
 /* Available operations in flow isolated mode. */
@@ -972,6 +1056,8 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = {
        .rx_queue_intr_enable = mlx5_rx_intr_enable,
        .rx_queue_intr_disable = mlx5_rx_intr_disable,
        .is_removed = mlx5_is_removed,
+       .get_module_info = mlx5_get_module_info,
+       .get_module_eeprom = mlx5_get_module_eeprom,
 };
 
 /**
@@ -1215,8 +1301,6 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
                switch (spawn->pci_dev->id.device_id) {
                case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
                case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
                        if (config->txq_inline_min <
                                       (int)MLX5_INLINE_HSIZE_L2) {
                                DRV_LOG(DEBUG,
@@ -1302,6 +1386,7 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
                 * and PFC control may be broken, so disable feature.
                 */
                config->hw_vlan_insert = 0;
+               config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
                break;
        default:
                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
@@ -1311,6 +1396,126 @@ exit:
        DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min);
 }
 
+/**
+ * Allocate page of door-bells and register it using DevX API.
+ *
+ * @param [in] dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   Pointer to new page on success, NULL otherwise.
+ */
+static struct mlx5_devx_dbr_page *
+mlx5_alloc_dbr_page(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_devx_dbr_page *page;
+
+       /* Allocate space for door-bell page and management data. */
+       page = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_devx_dbr_page),
+                                RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+       if (!page) {
+               DRV_LOG(ERR, "port %u cannot allocate dbr page",
+                       dev->data->port_id);
+               return NULL;
+       }
+       /* Register allocated memory. */
+       page->umem = mlx5_glue->devx_umem_reg(priv->sh->ctx, page->dbrs,
+                                             MLX5_DBR_PAGE_SIZE, 0);
+       if (!page->umem) {
+               DRV_LOG(ERR, "port %u cannot umem reg dbr page",
+                       dev->data->port_id);
+               rte_free(page);
+               return NULL;
+       }
+       return page;
+}
+
+/**
+ * Find the next available door-bell, allocate new page if needed.
+ *
+ * @param [in] dev
+ *   Pointer to Ethernet device.
+ * @param [out] dbr_page
+ *   Door-bell page containing the page data.
+ *
+ * @return
+ *   Door-bell address offset on success, a negative error value otherwise.
+ */
+int64_t
+mlx5_get_dbr(struct rte_eth_dev *dev, struct mlx5_devx_dbr_page **dbr_page)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_devx_dbr_page *page = NULL;
+       uint32_t i, j;
+
+       LIST_FOREACH(page, &priv->dbrpgs, next)
+               if (page->dbr_count < MLX5_DBR_PER_PAGE)
+                       break;
+       if (!page) { /* No page with free door-bell exists. */
+               page = mlx5_alloc_dbr_page(dev);
+               if (!page) /* Failed to allocate new page. */
+                       return (-1);
+               LIST_INSERT_HEAD(&priv->dbrpgs, page, next);
+       }
+       /* Loop to find bitmap part with clear bit. */
+       for (i = 0;
+            i < MLX5_DBR_BITMAP_SIZE && page->dbr_bitmap[i] == UINT64_MAX;
+            i++)
+               ; /* Empty. */
+       /* Find the first clear bit. */
+       j = rte_bsf64(~page->dbr_bitmap[i]);
+       assert(i < (MLX5_DBR_PER_PAGE / 64));
+       page->dbr_bitmap[i] |= (1 << j);
+       page->dbr_count++;
+       *dbr_page = page;
+       return (((i * 64) + j) * sizeof(uint64_t));
+}
+
+/**
+ * Release a door-bell record.
+ *
+ * @param [in] dev
+ *   Pointer to Ethernet device.
+ * @param [in] umem_id
+ *   UMEM ID of page containing the door-bell record to release.
+ * @param [in] offset
+ *   Offset of door-bell record in page.
+ *
+ * @return
+ *   0 on success, a negative error value otherwise.
+ */
+int32_t
+mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id, uint64_t offset)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_devx_dbr_page *page = NULL;
+       int ret = 0;
+
+       LIST_FOREACH(page, &priv->dbrpgs, next)
+               /* Find the page this address belongs to. */
+               if (page->umem->umem_id == umem_id)
+                       break;
+       if (!page)
+               return -EINVAL;
+       page->dbr_count--;
+       if (!page->dbr_count) {
+               /* Page not used, free it and remove from list. */
+               LIST_REMOVE(page, next);
+               if (page->umem)
+                       ret = -mlx5_glue->devx_umem_dereg(page->umem);
+               rte_free(page);
+       } else {
+               /* Mark in bitmap that this door-bell is not in use. */
+               offset /= MLX5_DBR_SIZE;
+               int i = offset / 64;
+               int j = offset % 64;
+
+               page->dbr_bitmap[i] &= ~(1 << j);
+       }
+       return ret;
+}
+
 /**
  * Spawn an Ethernet device from Verbs information.
  *
@@ -1683,6 +1888,36 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        } else if (config.cqe_pad) {
                DRV_LOG(INFO, "Rx CQE padding is enabled");
        }
+       if (config.devx) {
+               priv->counter_fallback = 0;
+               err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
+               if (err) {
+                       err = -err;
+                       goto error;
+               }
+               if (!config.hca_attr.flow_counters_dump)
+                       priv->counter_fallback = 1;
+#ifndef HAVE_IBV_DEVX_ASYNC
+               priv->counter_fallback = 1;
+#endif
+               if (priv->counter_fallback)
+                       DRV_LOG(INFO, "Use fall-back DV counter management\n");
+               /* Check for LRO support. */
+               if (config.dest_tir && config.hca_attr.lro_cap) {
+                       /* TBD check tunnel lro caps. */
+                       config.lro.supported = config.hca_attr.lro_cap;
+                       DRV_LOG(DEBUG, "Device supports LRO");
+                       /*
+                        * If LRO timeout is not configured by application,
+                        * use the minimal supported value.
+                        */
+                       if (!config.lro.timeout)
+                               config.lro.timeout =
+                               config.hca_attr.lro_timer_supported_periods[0];
+                       DRV_LOG(DEBUG, "LRO session timeout set to %d usec",
+                               config.lro.timeout);
+               }
+       }
        if (config.mprq.enabled && mprq) {
                if (config.mprq.stride_num_n > mprq_max_stride_num_n ||
                    config.mprq.stride_num_n < mprq_min_stride_num_n) {
@@ -1790,23 +2025,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
         * Verbs context returned by ibv_open_device().
         */
        mlx5_link_update(eth_dev, 0);
-#ifdef HAVE_IBV_DEVX_OBJ
-       if (config.devx) {
-               priv->counter_fallback = 0;
-               err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
-               if (err) {
-                       err = -err;
-                       goto error;
-               }
-               if (!config.hca_attr.flow_counters_dump)
-                       priv->counter_fallback = 1;
-#ifndef HAVE_IBV_DEVX_ASYNC
-               priv->counter_fallback = 1;
-#endif
-               if (priv->counter_fallback)
-                       DRV_LOG(INFO, "Use fall-back DV counter management\n");
-       }
-#endif
 #ifdef HAVE_MLX5DV_DR_ESWITCH
        if (!(config.hca_attr.eswitch_manager && config.dv_flow_en &&
              (switch_info->representor || switch_info->master)))
@@ -1818,6 +2036,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        mlx5_set_min_inline(spawn, &config);
        /* Store device configuration on private structure. */
        priv->config = config;
+       /* Create context for virtual machine VLAN workaround. */
+       priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
        if (config.dv_flow_en) {
                err = mlx5_alloc_shared_dr(priv);
                if (err)
@@ -1830,11 +2050,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                goto error;
        }
        priv->config.flow_prio = err;
-       /* Add device to memory callback list. */
-       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
-       LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
-                        sh, mem_event_cb);
-       rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
        return eth_dev;
 error:
        if (priv) {
@@ -1844,6 +2059,8 @@ error:
                        close(priv->nl_socket_route);
                if (priv->nl_socket_rdma >= 0)
                        close(priv->nl_socket_rdma);
+               if (priv->vmwa_context)
+                       mlx5_vlan_vmwa_exit(priv->vmwa_context);
                if (own_domain_id)
                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
                rte_free(priv);