net/sfc: fix initialization error path
[dpdk.git] / drivers / net / sfc / sfc.c
index a5d4736..3f5cd77 100644 (file)
@@ -1,38 +1,17 @@
-/*-
- *   BSD LICENSE
+/* SPDX-License-Identifier: BSD-3-Clause
  *
- * Copyright (c) 2016-2017 Solarflare Communications Inc.
+ * Copyright (c) 2016-2018 Solarflare Communications Inc.
  * All rights reserved.
  *
  * This software was jointly developed between OKTET Labs (under contract
  * for Solarflare) and Solarflare Communications, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* sysconf() */
 #include <unistd.h>
 
 #include <rte_errno.h>
+#include <rte_alarm.h>
 
 #include "efx.h"
 
@@ -41,6 +20,8 @@
 #include "sfc_ev.h"
 #include "sfc_rx.h"
 #include "sfc_tx.h"
+#include "sfc_kvargs.h"
+#include "sfc_tweak.h"
 
 
 int
@@ -49,7 +30,7 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
 {
        const struct rte_memzone *mz;
 
-       sfc_log_init(sa, "name=%s id=%u len=%lu socket_id=%d",
+       sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
                     name, id, len, socket_id);
 
        mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
@@ -61,8 +42,8 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
                return ENOMEM;
        }
 
-       esmp->esm_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
-       if (esmp->esm_addr == RTE_BAD_PHYS_ADDR) {
+       esmp->esm_addr = mz->iova;
+       if (esmp->esm_addr == RTE_BAD_IOVA) {
                (void)rte_memzone_free(mz);
                return EFAULT;
        }
@@ -102,14 +83,23 @@ sfc_phy_cap_from_link_speeds(uint32_t speeds)
                        phy_caps |=
                                (1 << EFX_PHY_CAP_1000FDX) |
                                (1 << EFX_PHY_CAP_10000FDX) |
-                               (1 << EFX_PHY_CAP_40000FDX);
+                               (1 << EFX_PHY_CAP_25000FDX) |
+                               (1 << EFX_PHY_CAP_40000FDX) |
+                               (1 << EFX_PHY_CAP_50000FDX) |
+                               (1 << EFX_PHY_CAP_100000FDX);
        }
        if (speeds & ETH_LINK_SPEED_1G)
                phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
        if (speeds & ETH_LINK_SPEED_10G)
                phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
+       if (speeds & ETH_LINK_SPEED_25G)
+               phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
        if (speeds & ETH_LINK_SPEED_40G)
                phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
+       if (speeds & ETH_LINK_SPEED_50G)
+               phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
+       if (speeds & ETH_LINK_SPEED_100G)
+               phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
 
        return phy_caps;
 }
@@ -134,10 +124,12 @@ sfc_check_conf(struct sfc_adapter *sa)
                rc = EINVAL;
        }
 
+#if !EFSYS_OPT_LOOPBACK
        if (conf->lpbk_mode != 0) {
                sfc_err(sa, "Loopback not supported");
                rc = EINVAL;
        }
+#endif
 
        if (conf->dcb_capability_en != 0) {
                sfc_err(sa, "Priority-based flow control not supported");
@@ -156,7 +148,8 @@ sfc_check_conf(struct sfc_adapter *sa)
                rc = EINVAL;
        }
 
-       if (conf->intr_conf.rxq != 0) {
+       if (conf->intr_conf.rxq != 0 &&
+           (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
                sfc_err(sa, "Receive queue interrupt not supported");
                rc = EINVAL;
        }
@@ -248,8 +241,8 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa)
        return 0;
 
 fail_get_vi_pool:
-fail_nic_init:
        efx_nic_fini(sa->nic);
+fail_nic_init:
        return rc;
 }
 
@@ -270,27 +263,74 @@ sfc_set_drv_limits(struct sfc_adapter *sa)
        return efx_nic_set_drv_limits(sa->nic, &lim);
 }
 
-int
-sfc_start(struct sfc_adapter *sa)
+static int
+sfc_set_fw_subvariant(struct sfc_adapter *sa)
 {
+       struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+       const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+       uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
+       unsigned int txq_index;
+       efx_nic_fw_subvariant_t req_fw_subvariant;
+       efx_nic_fw_subvariant_t cur_fw_subvariant;
        int rc;
 
-       sfc_log_init(sa, "entry");
+       if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
+               sfc_info(sa, "no-Tx-checksum subvariant not supported");
+               return 0;
+       }
 
-       SFC_ASSERT(sfc_adapter_is_locked(sa));
+       for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
+               struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
 
-       switch (sa->state) {
-       case SFC_ADAPTER_CONFIGURED:
-               break;
-       case SFC_ADAPTER_STARTED:
-               sfc_info(sa, "already started");
+               if (txq_info->state & SFC_TXQ_INITIALIZED)
+                       tx_offloads |= txq_info->offloads;
+       }
+
+       if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
+                          DEV_TX_OFFLOAD_TCP_CKSUM |
+                          DEV_TX_OFFLOAD_UDP_CKSUM |
+                          DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
+               req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
+       else
+               req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
+
+       rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
+       if (rc != 0) {
+               sfc_err(sa, "failed to get FW subvariant: %d", rc);
+               return rc;
+       }
+       sfc_info(sa, "FW subvariant is %u vs required %u",
+                cur_fw_subvariant, req_fw_subvariant);
+
+       if (cur_fw_subvariant == req_fw_subvariant)
                return 0;
-       default:
-               rc = EINVAL;
-               goto fail_bad_state;
+
+       rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
+       if (rc != 0) {
+               sfc_err(sa, "failed to set FW subvariant %u: %d",
+                       req_fw_subvariant, rc);
+               return rc;
        }
+       sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
 
-       sa->state = SFC_ADAPTER_STARTING;
+       return 0;
+}
+
+static int
+sfc_try_start(struct sfc_adapter *sa)
+{
+       const efx_nic_cfg_t *encp;
+       int rc;
+
+       sfc_log_init(sa, "entry");
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+       SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
+
+       sfc_log_init(sa, "set FW subvariant");
+       rc = sfc_set_fw_subvariant(sa);
+       if (rc != 0)
+               goto fail_set_fw_subvariant;
 
        sfc_log_init(sa, "set resource limits");
        rc = sfc_set_drv_limits(sa);
@@ -302,6 +342,23 @@ sfc_start(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_nic_init;
 
+       encp = efx_nic_cfg_get(sa->nic);
+
+       /*
+        * Refresh (since it may change on NIC reset/restart) a copy of
+        * supported tunnel encapsulations in shared memory to be used
+        * on supported Rx packet type classes get.
+        */
+       sa->priv.shared->tunnel_encaps =
+               encp->enc_tunnel_encapsulations_supported;
+
+       if (encp->enc_tunnel_encapsulations_supported != 0) {
+               sfc_log_init(sa, "apply tunnel config");
+               rc = efx_tunnel_reconfigure(sa->nic);
+               if (rc != 0)
+                       goto fail_tunnel_reconfigure;
+       }
+
        rc = sfc_intr_start(sa);
        if (rc != 0)
                goto fail_intr_start;
@@ -326,7 +383,6 @@ sfc_start(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_flows_insert;
 
-       sa->state = SFC_ADAPTER_STARTED;
        sfc_log_init(sa, "done");
        return 0;
 
@@ -346,10 +402,52 @@ fail_ev_start:
        sfc_intr_stop(sa);
 
 fail_intr_start:
+fail_tunnel_reconfigure:
        efx_nic_fini(sa->nic);
 
 fail_nic_init:
 fail_set_drv_limits:
+fail_set_fw_subvariant:
+       sfc_log_init(sa, "failed %d", rc);
+       return rc;
+}
+
+int
+sfc_start(struct sfc_adapter *sa)
+{
+       unsigned int start_tries = 3;
+       int rc;
+
+       sfc_log_init(sa, "entry");
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+
+       switch (sa->state) {
+       case SFC_ADAPTER_CONFIGURED:
+               break;
+       case SFC_ADAPTER_STARTED:
+               sfc_notice(sa, "already started");
+               return 0;
+       default:
+               rc = EINVAL;
+               goto fail_bad_state;
+       }
+
+       sa->state = SFC_ADAPTER_STARTING;
+
+       do {
+               rc = sfc_try_start(sa);
+       } while ((--start_tries > 0) &&
+                (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
+
+       if (rc != 0)
+               goto fail_try_start;
+
+       sa->state = SFC_ADAPTER_STARTED;
+       sfc_log_init(sa, "done");
+       return 0;
+
+fail_try_start:
        sa->state = SFC_ADAPTER_CONFIGURED;
 fail_bad_state:
        sfc_log_init(sa, "failed %d", rc);
@@ -367,7 +465,7 @@ sfc_stop(struct sfc_adapter *sa)
        case SFC_ADAPTER_STARTED:
                break;
        case SFC_ADAPTER_CONFIGURED:
-               sfc_info(sa, "already stopped");
+               sfc_notice(sa, "already stopped");
                return;
        default:
                sfc_err(sa, "stop in unexpected state %u", sa->state);
@@ -389,6 +487,58 @@ sfc_stop(struct sfc_adapter *sa)
        sfc_log_init(sa, "done");
 }
 
+static int
+sfc_restart(struct sfc_adapter *sa)
+{
+       int rc;
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+
+       if (sa->state != SFC_ADAPTER_STARTED)
+               return EINVAL;
+
+       sfc_stop(sa);
+
+       rc = sfc_start(sa);
+       if (rc != 0)
+               sfc_err(sa, "restart failed");
+
+       return rc;
+}
+
+static void
+sfc_restart_if_required(void *arg)
+{
+       struct sfc_adapter *sa = arg;
+
+       /* If restart is scheduled, clear the flag and do it */
+       if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
+                               1, 0)) {
+               sfc_adapter_lock(sa);
+               if (sa->state == SFC_ADAPTER_STARTED)
+                       (void)sfc_restart(sa);
+               sfc_adapter_unlock(sa);
+       }
+}
+
+void
+sfc_schedule_restart(struct sfc_adapter *sa)
+{
+       int rc;
+
+       /* Schedule restart alarm if it is not scheduled yet */
+       if (!rte_atomic32_test_and_set(&sa->restart_required))
+               return;
+
+       rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
+       if (rc == -ENOTSUP)
+               sfc_warn(sa, "alarms are not supported, restart is pending");
+       else if (rc != 0)
+               sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
+       else
+               sfc_notice(sa, "restart scheduled");
+}
+
 int
 sfc_configure(struct sfc_adapter *sa)
 {
@@ -398,7 +548,8 @@ sfc_configure(struct sfc_adapter *sa)
 
        SFC_ASSERT(sfc_adapter_is_locked(sa));
 
-       SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED);
+       SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
+                  sa->state == SFC_ADAPTER_CONFIGURED);
        sa->state = SFC_ADAPTER_CONFIGURING;
 
        rc = sfc_check_conf(sa);
@@ -409,29 +560,29 @@ sfc_configure(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_intr_configure;
 
-       rc = sfc_port_init(sa);
+       rc = sfc_port_configure(sa);
        if (rc != 0)
-               goto fail_port_init;
+               goto fail_port_configure;
 
-       rc = sfc_rx_init(sa);
+       rc = sfc_rx_configure(sa);
        if (rc != 0)
-               goto fail_rx_init;
+               goto fail_rx_configure;
 
-       rc = sfc_tx_init(sa);
+       rc = sfc_tx_configure(sa);
        if (rc != 0)
-               goto fail_tx_init;
+               goto fail_tx_configure;
 
        sa->state = SFC_ADAPTER_CONFIGURED;
        sfc_log_init(sa, "done");
        return 0;
 
-fail_tx_init:
-       sfc_rx_fini(sa);
+fail_tx_configure:
+       sfc_rx_close(sa);
 
-fail_rx_init:
-       sfc_port_fini(sa);
+fail_rx_configure:
+       sfc_port_close(sa);
 
-fail_port_init:
+fail_port_configure:
        sfc_intr_close(sa);
 
 fail_intr_configure:
@@ -451,9 +602,9 @@ sfc_close(struct sfc_adapter *sa)
        SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
        sa->state = SFC_ADAPTER_CLOSING;
 
-       sfc_tx_fini(sa);
-       sfc_rx_fini(sa);
-       sfc_port_fini(sa);
+       sfc_tx_close(sa);
+       sfc_rx_close(sa);
+       sfc_port_close(sa);
        sfc_intr_close(sa);
 
        sa->state = SFC_ADAPTER_INITIALIZED;
@@ -461,27 +612,18 @@ sfc_close(struct sfc_adapter *sa)
 }
 
 static int
-sfc_mem_bar_init(struct sfc_adapter *sa)
+sfc_mem_bar_init(struct sfc_adapter *sa, unsigned int membar)
 {
        struct rte_eth_dev *eth_dev = sa->eth_dev;
-       struct rte_pci_device *pci_dev = SFC_DEV_TO_PCI(eth_dev);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
        efsys_bar_t *ebp = &sa->mem_bar;
-       unsigned int i;
-       struct rte_mem_resource *res;
-
-       for (i = 0; i < RTE_DIM(pci_dev->mem_resource); i++) {
-               res = &pci_dev->mem_resource[i];
-               if ((res->len != 0) && (res->phys_addr != 0)) {
-                       /* Found first memory BAR */
-                       SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
-                       ebp->esb_rid = i;
-                       ebp->esb_dev = pci_dev;
-                       ebp->esb_base = res->addr;
-                       return 0;
-               }
-       }
+       struct rte_mem_resource *res = &pci_dev->mem_resource[membar];
 
-       return EFAULT;
+       SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
+       ebp->esb_rid = membar;
+       ebp->esb_dev = pci_dev;
+       ebp->esb_base = res->addr;
+       return 0;
 }
 
 static void
@@ -493,26 +635,24 @@ sfc_mem_bar_fini(struct sfc_adapter *sa)
        memset(ebp, 0, sizeof(*ebp));
 }
 
-#if EFSYS_OPT_RX_SCALE
 /*
  * A fixed RSS key which has a property of being symmetric
  * (symmetrical flows are distributed to the same CPU)
  * and also known to give a uniform distribution
  * (a good distribution of traffic between different CPUs)
  */
-static const uint8_t default_rss_key[SFC_RSS_KEY_SIZE] = {
+static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
        0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
        0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
        0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
        0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
        0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
 };
-#endif
 
 static int
-sfc_set_rss_defaults(struct sfc_adapter *sa)
+sfc_rss_attach(struct sfc_adapter *sa)
 {
-#if EFSYS_OPT_RX_SCALE
+       struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
        int rc;
 
        rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
@@ -527,26 +667,31 @@ sfc_set_rss_defaults(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_rx_init;
 
-       rc = efx_rx_scale_support_get(sa->nic, &sa->rss_support);
+       rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
        if (rc != 0)
                goto fail_scale_support_get;
 
-       rc = efx_rx_hash_support_get(sa->nic, &sa->hash_support);
+       rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
        if (rc != 0)
                goto fail_hash_support_get;
 
+       rc = sfc_rx_hash_init(sa);
+       if (rc != 0)
+               goto fail_rx_hash_init;
+
        efx_rx_fini(sa->nic);
        efx_ev_fini(sa->nic);
        efx_intr_fini(sa->nic);
 
-       sa->rss_hash_types = sfc_rte_to_efx_hash_type(SFC_RSS_OFFLOADS);
-
-       rte_memcpy(sa->rss_key, default_rss_key, sizeof(sa->rss_key));
+       rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
 
        return 0;
 
+fail_rx_hash_init:
 fail_hash_support_get:
 fail_scale_support_get:
+       efx_rx_fini(sa->nic);
+
 fail_rx_init:
        efx_ev_fini(sa->nic);
 
@@ -555,9 +700,12 @@ fail_ev_init:
 
 fail_intr_init:
        return rc;
-#else
-       return 0;
-#endif
+}
+
+static void
+sfc_rss_detach(struct sfc_adapter *sa)
+{
+       sfc_rx_hash_fini(sa);
 }
 
 int
@@ -578,13 +726,38 @@ sfc_attach(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_nic_reset;
 
+       /*
+        * Probed NIC is sufficient for tunnel init.
+        * Initialize tunnel support to be able to use libefx
+        * efx_tunnel_config_udp_{add,remove}() in any state and
+        * efx_tunnel_reconfigure() on start up.
+        */
+       rc = efx_tunnel_init(enp);
+       if (rc != 0)
+               goto fail_tunnel_init;
+
        encp = efx_nic_cfg_get(sa->nic);
 
-       if (sa->dp_tx->features & SFC_DP_TX_FEAT_TSO) {
+       /*
+        * Make a copy of supported tunnel encapsulations in shared
+        * memory to be used on supported Rx packet type classes get.
+        */
+       sa->priv.shared->tunnel_encaps =
+               encp->enc_tunnel_encapsulations_supported;
+
+       if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
                sa->tso = encp->enc_fw_assisted_tso_v2_enabled;
                if (!sa->tso)
-                       sfc_warn(sa,
-                                "TSO support isn't available on this adapter");
+                       sfc_info(sa, "TSO support isn't available on this adapter");
+       }
+
+       if (sa->tso &&
+           (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
+            (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+             DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
+               sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled;
+               if (!sa->tso_encap)
+                       sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
        }
 
        sfc_log_init(sa, "estimate resource limits");
@@ -592,9 +765,24 @@ sfc_attach(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_estimate_rsrc_limits;
 
+       sa->evq_max_entries = encp->enc_evq_max_nevs;
+       SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
+
+       sa->evq_min_entries = encp->enc_evq_min_nevs;
+       SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
+
+       sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
+       SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
+
+       sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
+       SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
+
        sa->txq_max_entries = encp->enc_txq_max_ndescs;
        SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
 
+       sa->txq_min_entries = encp->enc_txq_min_ndescs;
+       SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
+
        rc = sfc_intr_attach(sa);
        if (rc != 0)
                goto fail_intr_attach;
@@ -603,12 +791,13 @@ sfc_attach(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_ev_attach;
 
-       efx_phy_adv_cap_get(sa->nic, EFX_PHY_CAP_PERM,
-                           &sa->port.phy_adv_cap_mask);
+       rc = sfc_port_attach(sa);
+       if (rc != 0)
+               goto fail_port_attach;
 
-       rc = sfc_set_rss_defaults(sa);
+       rc = sfc_rss_attach(sa);
        if (rc != 0)
-               goto fail_set_rss_defaults;
+               goto fail_rss_attach;
 
        rc = sfc_filter_attach(sa);
        if (rc != 0)
@@ -625,7 +814,12 @@ sfc_attach(struct sfc_adapter *sa)
        return 0;
 
 fail_filter_attach:
-fail_set_rss_defaults:
+       sfc_rss_detach(sa);
+
+fail_rss_attach:
+       sfc_port_detach(sa);
+
+fail_port_attach:
        sfc_ev_detach(sa);
 
 fail_ev_attach:
@@ -635,6 +829,9 @@ fail_intr_attach:
        efx_nic_fini(sa->nic);
 
 fail_estimate_rsrc_limits:
+fail_tunnel_init:
+       efx_tunnel_fini(sa->nic);
+
 fail_nic_reset:
 
        sfc_log_init(sa, "failed %d", rc);
@@ -651,16 +848,178 @@ sfc_detach(struct sfc_adapter *sa)
        sfc_flow_fini(sa);
 
        sfc_filter_detach(sa);
+       sfc_rss_detach(sa);
+       sfc_port_detach(sa);
        sfc_ev_detach(sa);
        sfc_intr_detach(sa);
+       efx_tunnel_fini(sa->nic);
 
        sa->state = SFC_ADAPTER_UNINITIALIZED;
 }
 
+static int
+sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
+                            const char *value_str, void *opaque)
+{
+       uint32_t *value = opaque;
+
+       if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
+               *value = EFX_FW_VARIANT_DONT_CARE;
+       else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
+               *value = EFX_FW_VARIANT_FULL_FEATURED;
+       else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
+               *value = EFX_FW_VARIANT_LOW_LATENCY;
+       else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
+               *value = EFX_FW_VARIANT_PACKED_STREAM;
+       else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
+               *value = EFX_FW_VARIANT_DPDK;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+static int
+sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
+{
+       efx_nic_fw_info_t enfi;
+       int rc;
+
+       rc = efx_nic_get_fw_version(sa->nic, &enfi);
+       if (rc != 0)
+               return rc;
+       else if (!enfi.enfi_dpcpu_fw_ids_valid)
+               return ENOTSUP;
+
+       /*
+        * Firmware variant can be uniquely identified by the RxDPCPU
+        * firmware id
+        */
+       switch (enfi.enfi_rx_dpcpu_fw_id) {
+       case EFX_RXDP_FULL_FEATURED_FW_ID:
+               *efv = EFX_FW_VARIANT_FULL_FEATURED;
+               break;
+
+       case EFX_RXDP_LOW_LATENCY_FW_ID:
+               *efv = EFX_FW_VARIANT_LOW_LATENCY;
+               break;
+
+       case EFX_RXDP_PACKED_STREAM_FW_ID:
+               *efv = EFX_FW_VARIANT_PACKED_STREAM;
+               break;
+
+       case EFX_RXDP_DPDK_FW_ID:
+               *efv = EFX_FW_VARIANT_DPDK;
+               break;
+
+       default:
+               /*
+                * Other firmware variants are not considered, since they are
+                * not supported in the device parameters
+                */
+               *efv = EFX_FW_VARIANT_DONT_CARE;
+               break;
+       }
+
+       return 0;
+}
+
+static const char *
+sfc_fw_variant2str(efx_fw_variant_t efv)
+{
+       switch (efv) {
+       case EFX_RXDP_FULL_FEATURED_FW_ID:
+               return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
+       case EFX_RXDP_LOW_LATENCY_FW_ID:
+               return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
+       case EFX_RXDP_PACKED_STREAM_FW_ID:
+               return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
+       case EFX_RXDP_DPDK_FW_ID:
+               return SFC_KVARG_FW_VARIANT_DPDK;
+       default:
+               return "unknown";
+       }
+}
+
+static int
+sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
+{
+       int rc;
+       long value;
+
+       value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
+
+       rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
+                               sfc_kvarg_long_handler, &value);
+       if (rc != 0)
+               return rc;
+
+       if (value < 0 ||
+           (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
+               sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
+                           "was set (%ld);", value);
+               sfc_err(sa, "it must not be less than 0 or greater than %u",
+                           EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
+               return EINVAL;
+       }
+
+       sa->rxd_wait_timeout_ns = value;
+       return 0;
+}
+
+static int
+sfc_nic_probe(struct sfc_adapter *sa)
+{
+       efx_nic_t *enp = sa->nic;
+       efx_fw_variant_t preferred_efv;
+       efx_fw_variant_t efv;
+       int rc;
+
+       preferred_efv = EFX_FW_VARIANT_DONT_CARE;
+       rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
+                               sfc_kvarg_fv_variant_handler,
+                               &preferred_efv);
+       if (rc != 0) {
+               sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
+               return rc;
+       }
+
+       rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
+       if (rc != 0)
+               return rc;
+
+       rc = efx_nic_probe(enp, preferred_efv);
+       if (rc == EACCES) {
+               /* Unprivileged functions cannot set FW variant */
+               rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
+       }
+       if (rc != 0)
+               return rc;
+
+       rc = sfc_get_fw_variant(sa, &efv);
+       if (rc == ENOTSUP) {
+               sfc_warn(sa, "FW variant can not be obtained");
+               return 0;
+       }
+       if (rc != 0)
+               return rc;
+
+       /* Check that firmware variant was changed to the requested one */
+       if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
+               sfc_warn(sa, "FW variant has not changed to the requested %s",
+                        sfc_fw_variant2str(preferred_efv));
+       }
+
+       sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
+
+       return 0;
+}
+
 int
 sfc_probe(struct sfc_adapter *sa)
 {
-       struct rte_pci_device *pci_dev = SFC_DEV_TO_PCI(sa->eth_dev);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(sa->eth_dev);
+       unsigned int membar;
        efx_nic_t *enp;
        int rc;
 
@@ -669,18 +1028,19 @@ sfc_probe(struct sfc_adapter *sa)
        SFC_ASSERT(sfc_adapter_is_locked(sa));
 
        sa->socket_id = rte_socket_id();
-
-       sfc_log_init(sa, "init mem bar");
-       rc = sfc_mem_bar_init(sa);
-       if (rc != 0)
-               goto fail_mem_bar_init;
+       rte_atomic32_init(&sa->restart_required);
 
        sfc_log_init(sa, "get family");
        rc = efx_family(pci_dev->id.vendor_id, pci_dev->id.device_id,
-                       &sa->family);
+                       &sa->family, &membar);
        if (rc != 0)
                goto fail_family;
-       sfc_log_init(sa, "family is %u", sa->family);
+       sfc_log_init(sa, "family is %u, membar is %u", sa->family, membar);
+
+       sfc_log_init(sa, "init mem bar");
+       rc = sfc_mem_bar_init(sa, membar);
+       if (rc != 0)
+               goto fail_mem_bar_init;
 
        sfc_log_init(sa, "create nic");
        rte_spinlock_init(&sa->nic_lock);
@@ -695,7 +1055,7 @@ sfc_probe(struct sfc_adapter *sa)
                goto fail_mcdi_init;
 
        sfc_log_init(sa, "probe nic");
-       rc = efx_nic_probe(enp);
+       rc = sfc_nic_probe(sa);
        if (rc != 0)
                goto fail_nic_probe;
 
@@ -711,10 +1071,10 @@ fail_mcdi_init:
        efx_nic_destroy(enp);
 
 fail_nic_create:
-fail_family:
        sfc_mem_bar_fini(sa);
 
 fail_mem_bar_init:
+fail_family:
        sfc_log_init(sa, "failed %d", rc);
        return rc;
 }
@@ -733,6 +1093,14 @@ sfc_unprobe(struct sfc_adapter *sa)
 
        sfc_mcdi_fini(sa);
 
+       /*
+        * Make sure there is no pending alarm to restart since we are
+        * going to free device private which is passed as the callback
+        * opaque data. A new alarm cannot be scheduled since MCDI is
+        * shut down.
+        */
+       rte_eal_alarm_cancel(sfc_restart_if_required, sa);
+
        sfc_log_init(sa, "destroy nic");
        sa->nic = NULL;
        efx_nic_destroy(enp);
@@ -742,3 +1110,38 @@ sfc_unprobe(struct sfc_adapter *sa)
        sfc_flow_fini(sa);
        sa->state = SFC_ADAPTER_UNINITIALIZED;
 }
+
+uint32_t
+sfc_register_logtype(const struct rte_pci_addr *pci_addr,
+                    const char *lt_prefix_str, uint32_t ll_default)
+{
+       size_t lt_prefix_str_size = strlen(lt_prefix_str);
+       size_t lt_str_size_max;
+       char *lt_str = NULL;
+       int ret;
+
+       if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
+               ++lt_prefix_str_size; /* Reserve space for prefix separator */
+               lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
+       } else {
+               return sfc_logtype_driver;
+       }
+
+       lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
+       if (lt_str == NULL)
+               return sfc_logtype_driver;
+
+       strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
+       lt_str[lt_prefix_str_size - 1] = '.';
+       rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
+                           lt_str_size_max - lt_prefix_str_size);
+       lt_str[lt_str_size_max - 1] = '\0';
+
+       ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
+       rte_free(lt_str);
+
+       if (ret < 0)
+               return sfc_logtype_driver;
+
+       return ret;
+}