X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fsfc%2Fsfc.c;h=ed714fe02f3d7fca0e0c7de5af50e337bdf7a1ba;hb=4c6005999e41d71fb449652ee7f1ff4924e0affe;hp=dcf5eb866c80661d1ef9838a2c708064761b51d7;hpb=00b94c1c21d4e8300c51acdd5a0e43bed4552bae;p=dpdk.git diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c index dcf5eb866c..ed714fe02f 100644 --- a/drivers/net/sfc/sfc.c +++ b/drivers/net/sfc/sfc.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 2016-2018 Solarflare Communications Inc. - * All rights reserved. + * Copyright(c) 2019-2021 Xilinx, Inc. + * Copyright(c) 2016-2019 Solarflare Communications Inc. * * This software was jointly developed between OKTET Labs (under contract * for Solarflare) and Solarflare Communications, Inc. @@ -16,20 +16,52 @@ #include "efx.h" #include "sfc.h" +#include "sfc_debug.h" #include "sfc_log.h" #include "sfc_ev.h" #include "sfc_rx.h" +#include "sfc_mae_counter.h" #include "sfc_tx.h" #include "sfc_kvargs.h" +#include "sfc_tweak.h" +#include "sfc_sw_stats.h" +#include "sfc_switch.h" +#include "sfc_nic_dma.h" +bool +sfc_repr_supported(const struct sfc_adapter *sa) +{ + if (!sa->switchdev) + return false; + + /* + * Representor proxy should use service lcore on PF's socket + * (sa->socket_id) to be efficient. But the proxy will fall back + * to any socket if it is not possible to get the service core + * on the same socket. Check that at least service core on any + * socket is available. + */ + if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE) + return false; + + return true; +} + +bool +sfc_repr_available(const struct sfc_adapter_shared *sas) +{ + return sas->nb_repr_rxq > 0 && sas->nb_repr_txq > 0; +} int -sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, - size_t len, int socket_id, efsys_mem_t *esmp) +sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id, + efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id, + efsys_mem_t *esmp) { const struct rte_memzone *mz; + int rc; - sfc_log_init(sa, "name=%s id=%u len=%lu socket_id=%d", + sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d", name, id, len, socket_id); mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len, @@ -40,16 +72,25 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, rte_strerror(rte_errno)); return ENOMEM; } - - esmp->esm_addr = mz->iova; - if (esmp->esm_addr == RTE_BAD_IOVA) { + if (mz->iova == RTE_BAD_IOVA) { (void)rte_memzone_free(mz); return EFAULT; } + rc = sfc_nic_dma_mz_map(sa, mz, addr_type, &esmp->esm_addr); + if (rc != 0) { + (void)rte_memzone_free(mz); + return rc; + } + esmp->esm_mz = mz; esmp->esm_base = mz->addr; + sfc_info(sa, + "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx", + name, id, len, socket_id, esmp->esm_base, + (unsigned long)esmp->esm_addr); + return 0; } @@ -72,13 +113,13 @@ sfc_phy_cap_from_link_speeds(uint32_t speeds) { uint32_t phy_caps = 0; - if (~speeds & ETH_LINK_SPEED_FIXED) { + if (~speeds & RTE_ETH_LINK_SPEED_FIXED) { phy_caps |= (1 << EFX_PHY_CAP_AN); /* * If no speeds are specified in the mask, any supported * may be negotiated */ - if (speeds == ETH_LINK_SPEED_AUTONEG) + if (speeds == RTE_ETH_LINK_SPEED_AUTONEG) phy_caps |= (1 << EFX_PHY_CAP_1000FDX) | (1 << EFX_PHY_CAP_10000FDX) | @@ -87,17 +128,17 @@ sfc_phy_cap_from_link_speeds(uint32_t speeds) (1 << EFX_PHY_CAP_50000FDX) | (1 << EFX_PHY_CAP_100000FDX); } - if (speeds & ETH_LINK_SPEED_1G) + if (speeds & RTE_ETH_LINK_SPEED_1G) phy_caps |= (1 << EFX_PHY_CAP_1000FDX); - if (speeds & ETH_LINK_SPEED_10G) + if (speeds & RTE_ETH_LINK_SPEED_10G) phy_caps |= (1 << EFX_PHY_CAP_10000FDX); - if (speeds & ETH_LINK_SPEED_25G) + if (speeds & RTE_ETH_LINK_SPEED_25G) phy_caps |= (1 << EFX_PHY_CAP_25000FDX); - if (speeds & ETH_LINK_SPEED_40G) + if (speeds & RTE_ETH_LINK_SPEED_40G) phy_caps |= (1 << EFX_PHY_CAP_40000FDX); - if (speeds & ETH_LINK_SPEED_50G) + if (speeds & RTE_ETH_LINK_SPEED_50G) phy_caps |= (1 << EFX_PHY_CAP_50000FDX); - if (speeds & ETH_LINK_SPEED_100G) + if (speeds & RTE_ETH_LINK_SPEED_100G) phy_caps |= (1 << EFX_PHY_CAP_100000FDX); return phy_caps; @@ -147,7 +188,8 @@ sfc_check_conf(struct sfc_adapter *sa) rc = EINVAL; } - if (conf->intr_conf.rxq != 0) { + if (conf->intr_conf.rxq != 0 && + (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) { sfc_err(sa, "Receive queue interrupt not supported"); rc = EINVAL; } @@ -166,6 +208,7 @@ static int sfc_estimate_resource_limits(struct sfc_adapter *sa) { const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); efx_drv_limits_t limits; int rc; uint32_t evq_allocated; @@ -197,7 +240,7 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa) MIN(encp->enc_txq_limit, limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count); - if (sa->tso) + if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled) limits.edl_max_txq_count = MIN(limits.edl_max_txq_count, encp->enc_fw_assisted_tso_v2_n_contexts / @@ -227,36 +270,117 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa) rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count); txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count); - /* Subtract management EVQ not used for traffic */ - SFC_ASSERT(evq_allocated > 0); + /* + * Subtract management EVQ not used for traffic + * The resource allocation strategy is as follows: + * - one EVQ for management + * - one EVQ for each ethdev RXQ + * - one EVQ for each ethdev TXQ + * - one EVQ and one RXQ for optional MAE counters. + */ + if (evq_allocated == 0) { + sfc_err(sa, "count of allocated EvQ is 0"); + rc = ENOMEM; + goto fail_allocate_evq; + } evq_allocated--; - /* Right now we use separate EVQ for Rx and Tx */ - sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2); - sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max); + /* + * Reserve absolutely required minimum. + * Right now we use separate EVQ for Rx and Tx. + */ + if (rxq_allocated > 0 && evq_allocated > 0) { + sa->rxq_max = 1; + rxq_allocated--; + evq_allocated--; + } + if (txq_allocated > 0 && evq_allocated > 0) { + sa->txq_max = 1; + txq_allocated--; + evq_allocated--; + } + + if (sfc_mae_counter_rxq_required(sa) && + rxq_allocated > 0 && evq_allocated > 0) { + rxq_allocated--; + evq_allocated--; + sas->counters_rxq_allocated = true; + } else { + sas->counters_rxq_allocated = false; + } + + if (sfc_repr_supported(sa) && + evq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN + + SFC_REPR_PROXY_NB_TXQ_MIN && + rxq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN && + txq_allocated >= SFC_REPR_PROXY_NB_TXQ_MIN) { + unsigned int extra; + + txq_allocated -= SFC_REPR_PROXY_NB_TXQ_MIN; + rxq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN; + evq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN + + SFC_REPR_PROXY_NB_TXQ_MIN; + + sas->nb_repr_rxq = SFC_REPR_PROXY_NB_RXQ_MIN; + sas->nb_repr_txq = SFC_REPR_PROXY_NB_TXQ_MIN; + + /* Allocate extra representor RxQs up to the maximum */ + extra = MIN(evq_allocated, rxq_allocated); + extra = MIN(extra, + SFC_REPR_PROXY_NB_RXQ_MAX - sas->nb_repr_rxq); + evq_allocated -= extra; + rxq_allocated -= extra; + sas->nb_repr_rxq += extra; + + /* Allocate extra representor TxQs up to the maximum */ + extra = MIN(evq_allocated, txq_allocated); + extra = MIN(extra, + SFC_REPR_PROXY_NB_TXQ_MAX - sas->nb_repr_txq); + evq_allocated -= extra; + txq_allocated -= extra; + sas->nb_repr_txq += extra; + } else { + sas->nb_repr_rxq = 0; + sas->nb_repr_txq = 0; + } + + /* Add remaining allocated queues */ + sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2); + sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max); /* Keep NIC initialized */ return 0; +fail_allocate_evq: fail_get_vi_pool: -fail_nic_init: efx_nic_fini(sa->nic); +fail_nic_init: return rc; } static int sfc_set_drv_limits(struct sfc_adapter *sa) { + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); const struct rte_eth_dev_data *data = sa->eth_dev->data; + uint32_t rxq_reserved = sfc_nb_reserved_rxq(sas); + uint32_t txq_reserved = sfc_nb_txq_reserved(sas); efx_drv_limits_t lim; memset(&lim, 0, sizeof(lim)); - /* Limits are strict since take into account initial estimation */ + /* + * Limits are strict since take into account initial estimation. + * Resource allocation stategy is described in + * sfc_estimate_resource_limits(). + */ lim.edl_min_evq_count = lim.edl_max_evq_count = - 1 + data->nb_rx_queues + data->nb_tx_queues; - lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues; - lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues; + 1 + data->nb_rx_queues + data->nb_tx_queues + + rxq_reserved + txq_reserved; + lim.edl_min_rxq_count = lim.edl_max_rxq_count = + data->nb_rx_queues + rxq_reserved; + lim.edl_min_txq_count = lim.edl_max_txq_count = + data->nb_tx_queues + txq_reserved; return efx_nic_set_drv_limits(sa->nic, &lim); } @@ -264,6 +388,7 @@ sfc_set_drv_limits(struct sfc_adapter *sa) static int sfc_set_fw_subvariant(struct sfc_adapter *sa) { + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads; unsigned int txq_index; @@ -276,17 +401,17 @@ sfc_set_fw_subvariant(struct sfc_adapter *sa) return 0; } - for (txq_index = 0; txq_index < sa->txq_count; ++txq_index) { - struct sfc_txq_info *txq_info = &sa->txq_info[txq_index]; + for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) { + struct sfc_txq_info *txq_info = &sas->txq_info[txq_index]; - if (txq_info->txq != NULL) - tx_offloads |= txq_info->txq->offloads; + if (txq_info->state & SFC_TXQ_INITIALIZED) + tx_offloads |= txq_info->offloads; } - if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM | - DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) + if (tx_offloads & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | + RTE_ETH_TX_OFFLOAD_TCP_CKSUM | + RTE_ETH_TX_OFFLOAD_UDP_CKSUM | + RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM)) req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT; else req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM; @@ -322,7 +447,7 @@ sfc_try_start(struct sfc_adapter *sa) sfc_log_init(sa, "entry"); SFC_ASSERT(sfc_adapter_is_locked(sa)); - SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING); + SFC_ASSERT(sa->state == SFC_ETHDEV_STARTING); sfc_log_init(sa, "set FW subvariant"); rc = sfc_set_fw_subvariant(sa); @@ -339,7 +464,23 @@ sfc_try_start(struct sfc_adapter *sa) if (rc != 0) goto fail_nic_init; + sfc_log_init(sa, "reconfigure NIC DMA"); + rc = efx_nic_dma_reconfigure(sa->nic); + if (rc != 0) { + sfc_err(sa, "cannot reconfigure NIC DMA: %s", rte_strerror(rc)); + goto fail_nic_dma_reconfigure; + } + encp = efx_nic_cfg_get(sa->nic); + + /* + * Refresh (since it may change on NIC reset/restart) a copy of + * supported tunnel encapsulations in shared memory to be used + * on supported Rx packet type classes get. + */ + sa->priv.shared->tunnel_encaps = + encp->enc_tunnel_encapsulations_supported; + if (encp->enc_tunnel_encapsulations_supported != 0) { sfc_log_init(sa, "apply tunnel config"); rc = efx_tunnel_reconfigure(sa->nic); @@ -371,9 +512,16 @@ sfc_try_start(struct sfc_adapter *sa) if (rc != 0) goto fail_flows_insert; + rc = sfc_repr_proxy_start(sa); + if (rc != 0) + goto fail_repr_proxy_start; + sfc_log_init(sa, "done"); return 0; +fail_repr_proxy_start: + sfc_flow_stop(sa); + fail_flows_insert: sfc_tx_stop(sa); @@ -391,6 +539,7 @@ fail_ev_start: fail_intr_start: fail_tunnel_reconfigure: +fail_nic_dma_reconfigure: efx_nic_fini(sa->nic); fail_nic_init: @@ -411,9 +560,9 @@ sfc_start(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); switch (sa->state) { - case SFC_ADAPTER_CONFIGURED: + case SFC_ETHDEV_CONFIGURED: break; - case SFC_ADAPTER_STARTED: + case SFC_ETHDEV_STARTED: sfc_notice(sa, "already started"); return 0; default: @@ -421,9 +570,22 @@ sfc_start(struct sfc_adapter *sa) goto fail_bad_state; } - sa->state = SFC_ADAPTER_STARTING; + sa->state = SFC_ETHDEV_STARTING; + rc = 0; do { + /* + * FIXME Try to recreate vSwitch on start retry. + * vSwitch is absent after MC reboot like events and + * we should recreate it. May be we need proper + * indication instead of guessing. + */ + if (rc != 0) { + sfc_sriov_vswitch_destroy(sa); + rc = sfc_sriov_vswitch_create(sa); + if (rc != 0) + goto fail_sriov_vswitch_create; + } rc = sfc_try_start(sa); } while ((--start_tries > 0) && (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL)); @@ -431,12 +593,13 @@ sfc_start(struct sfc_adapter *sa) if (rc != 0) goto fail_try_start; - sa->state = SFC_ADAPTER_STARTED; + sa->state = SFC_ETHDEV_STARTED; sfc_log_init(sa, "done"); return 0; fail_try_start: - sa->state = SFC_ADAPTER_CONFIGURED; +fail_sriov_vswitch_create: + sa->state = SFC_ETHDEV_CONFIGURED; fail_bad_state: sfc_log_init(sa, "failed %d", rc); return rc; @@ -450,9 +613,9 @@ sfc_stop(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); switch (sa->state) { - case SFC_ADAPTER_STARTED: + case SFC_ETHDEV_STARTED: break; - case SFC_ADAPTER_CONFIGURED: + case SFC_ETHDEV_CONFIGURED: sfc_notice(sa, "already stopped"); return; default: @@ -461,8 +624,9 @@ sfc_stop(struct sfc_adapter *sa) return; } - sa->state = SFC_ADAPTER_STOPPING; + sa->state = SFC_ETHDEV_STOPPING; + sfc_repr_proxy_stop(sa); sfc_flow_stop(sa); sfc_tx_stop(sa); sfc_rx_stop(sa); @@ -471,7 +635,7 @@ sfc_stop(struct sfc_adapter *sa) sfc_intr_stop(sa); efx_nic_fini(sa->nic); - sa->state = SFC_ADAPTER_CONFIGURED; + sa->state = SFC_ETHDEV_CONFIGURED; sfc_log_init(sa, "done"); } @@ -482,7 +646,7 @@ sfc_restart(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); - if (sa->state != SFC_ADAPTER_STARTED) + if (sa->state != SFC_ETHDEV_STARTED) return EINVAL; sfc_stop(sa); @@ -503,7 +667,7 @@ sfc_restart_if_required(void *arg) if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required, 1, 0)) { sfc_adapter_lock(sa); - if (sa->state == SFC_ADAPTER_STARTED) + if (sa->state == SFC_ETHDEV_STARTED) (void)sfc_restart(sa); sfc_adapter_unlock(sa); } @@ -536,9 +700,9 @@ sfc_configure(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); - SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED || - sa->state == SFC_ADAPTER_CONFIGURED); - sa->state = SFC_ADAPTER_CONFIGURING; + SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED || + sa->state == SFC_ETHDEV_CONFIGURED); + sa->state = SFC_ETHDEV_CONFIGURING; rc = sfc_check_conf(sa); if (rc != 0) @@ -560,10 +724,17 @@ sfc_configure(struct sfc_adapter *sa) if (rc != 0) goto fail_tx_configure; - sa->state = SFC_ADAPTER_CONFIGURED; + rc = sfc_sw_xstats_configure(sa); + if (rc != 0) + goto fail_sw_xstats_configure; + + sa->state = SFC_ETHDEV_CONFIGURED; sfc_log_init(sa, "done"); return 0; +fail_sw_xstats_configure: + sfc_tx_close(sa); + fail_tx_configure: sfc_rx_close(sa); @@ -575,7 +746,7 @@ fail_port_configure: fail_intr_configure: fail_check_conf: - sa->state = SFC_ADAPTER_INITIALIZED; + sa->state = SFC_ETHDEV_INITIALIZED; sfc_log_init(sa, "failed %d", rc); return rc; } @@ -587,30 +758,35 @@ sfc_close(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); - SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED); - sa->state = SFC_ADAPTER_CLOSING; + SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED); + sa->state = SFC_ETHDEV_CLOSING; + sfc_sw_xstats_close(sa); sfc_tx_close(sa); sfc_rx_close(sa); sfc_port_close(sa); sfc_intr_close(sa); - sa->state = SFC_ADAPTER_INITIALIZED; + sa->state = SFC_ETHDEV_INITIALIZED; sfc_log_init(sa, "done"); } static int -sfc_mem_bar_init(struct sfc_adapter *sa, unsigned int membar) +sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp) { struct rte_eth_dev *eth_dev = sa->eth_dev; struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); efsys_bar_t *ebp = &sa->mem_bar; - struct rte_mem_resource *res = &pci_dev->mem_resource[membar]; + struct rte_mem_resource *res = + &pci_dev->mem_resource[mem_ebrp->ebr_index]; SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name); - ebp->esb_rid = membar; + ebp->esb_rid = mem_ebrp->ebr_index; ebp->esb_dev = pci_dev; ebp->esb_base = res->addr; + + sa->fcw_offset = mem_ebrp->ebr_offset; + return 0; } @@ -623,7 +799,6 @@ sfc_mem_bar_fini(struct sfc_adapter *sa) memset(ebp, 0, sizeof(*ebp)); } -#if EFSYS_OPT_RX_SCALE /* * A fixed RSS key which has a property of being symmetric * (symmetrical flows are distributed to the same CPU) @@ -637,12 +812,11 @@ static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = { 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, }; -#endif -#if EFSYS_OPT_RX_SCALE static int -sfc_set_rss_defaults(struct sfc_adapter *sa) +sfc_rss_attach(struct sfc_adapter *sa) { + struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; int rc; rc = efx_intr_init(sa->nic, sa->intr.type, NULL); @@ -657,24 +831,28 @@ sfc_set_rss_defaults(struct sfc_adapter *sa) if (rc != 0) goto fail_rx_init; - rc = efx_rx_scale_default_support_get(sa->nic, &sa->rss_support); + rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type); if (rc != 0) goto fail_scale_support_get; - rc = efx_rx_hash_default_support_get(sa->nic, &sa->hash_support); + rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support); if (rc != 0) goto fail_hash_support_get; + rc = sfc_rx_hash_init(sa); + if (rc != 0) + goto fail_rx_hash_init; + efx_rx_fini(sa->nic); efx_ev_fini(sa->nic); efx_intr_fini(sa->nic); - sa->rss_hash_types = sfc_rte_to_efx_hash_type(SFC_RSS_OFFLOADS); - - rte_memcpy(sa->rss_key, default_rss_key, sizeof(sa->rss_key)); + rte_memcpy(rss->key, default_rss_key, sizeof(rss->key)); + rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT; return 0; +fail_rx_hash_init: fail_hash_support_get: fail_scale_support_get: efx_rx_fini(sa->nic); @@ -688,13 +866,12 @@ fail_ev_init: fail_intr_init: return rc; } -#else -static int -sfc_set_rss_defaults(__rte_unused struct sfc_adapter *sa) + +static void +sfc_rss_detach(struct sfc_adapter *sa) { - return 0; + sfc_rx_hash_fini(sa); } -#endif int sfc_attach(struct sfc_adapter *sa) @@ -714,6 +891,10 @@ sfc_attach(struct sfc_adapter *sa) if (rc != 0) goto fail_nic_reset; + rc = sfc_sriov_attach(sa); + if (rc != 0) + goto fail_sriov_attach; + /* * Probed NIC is sufficient for tunnel init. * Initialize tunnel support to be able to use libefx @@ -726,11 +907,28 @@ sfc_attach(struct sfc_adapter *sa) encp = efx_nic_cfg_get(sa->nic); - if (sa->dp_tx->features & SFC_DP_TX_FEAT_TSO) { - sa->tso = encp->enc_fw_assisted_tso_v2_enabled; + /* + * Make a copy of supported tunnel encapsulations in shared + * memory to be used on supported Rx packet type classes get. + */ + sa->priv.shared->tunnel_encaps = + encp->enc_tunnel_encapsulations_supported; + + if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & RTE_ETH_TX_OFFLOAD_TCP_TSO) { + sa->tso = encp->enc_fw_assisted_tso_v2_enabled || + encp->enc_tso_v3_enabled; if (!sa->tso) - sfc_warn(sa, - "TSO support isn't available on this adapter"); + sfc_info(sa, "TSO support isn't available on this adapter"); + } + + if (sa->tso && + (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & + (RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | + RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) { + sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled || + encp->enc_tso_v3_enabled; + if (!sa->tso_encap) + sfc_info(sa, "Encapsulated TSO support isn't available on this adapter"); } sfc_log_init(sa, "estimate resource limits"); @@ -738,9 +936,24 @@ sfc_attach(struct sfc_adapter *sa) if (rc != 0) goto fail_estimate_rsrc_limits; + sa->evq_max_entries = encp->enc_evq_max_nevs; + SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries)); + + sa->evq_min_entries = encp->enc_evq_min_nevs; + SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries)); + + sa->rxq_max_entries = encp->enc_rxq_max_ndescs; + SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries)); + + sa->rxq_min_entries = encp->enc_rxq_min_ndescs; + SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries)); + sa->txq_max_entries = encp->enc_txq_max_ndescs; SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries)); + sa->txq_min_entries = encp->enc_txq_min_ndescs; + SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries)); + rc = sfc_intr_attach(sa); if (rc != 0) goto fail_intr_attach; @@ -753,26 +966,76 @@ sfc_attach(struct sfc_adapter *sa) if (rc != 0) goto fail_port_attach; - rc = sfc_set_rss_defaults(sa); + rc = sfc_rss_attach(sa); if (rc != 0) - goto fail_set_rss_defaults; + goto fail_rss_attach; rc = sfc_filter_attach(sa); if (rc != 0) goto fail_filter_attach; + rc = sfc_mae_counter_rxq_attach(sa); + if (rc != 0) + goto fail_mae_counter_rxq_attach; + + rc = sfc_mae_attach(sa); + if (rc != 0) + goto fail_mae_attach; + + rc = sfc_mae_switchdev_init(sa); + if (rc != 0) + goto fail_mae_switchdev_init; + + rc = sfc_repr_proxy_attach(sa); + if (rc != 0) + goto fail_repr_proxy_attach; + sfc_log_init(sa, "fini nic"); efx_nic_fini(enp); sfc_flow_init(sa); - sa->state = SFC_ADAPTER_INITIALIZED; + rc = sfc_sw_xstats_init(sa); + if (rc != 0) + goto fail_sw_xstats_init; + + /* + * Create vSwitch to be able to use VFs when PF is not started yet + * as DPDK port. VFs should be able to talk to each other even + * if PF is down. + */ + rc = sfc_sriov_vswitch_create(sa); + if (rc != 0) + goto fail_sriov_vswitch_create; + + sa->state = SFC_ETHDEV_INITIALIZED; sfc_log_init(sa, "done"); return 0; +fail_sriov_vswitch_create: + sfc_sw_xstats_close(sa); + +fail_sw_xstats_init: + sfc_flow_fini(sa); + sfc_repr_proxy_detach(sa); + +fail_repr_proxy_attach: + sfc_mae_switchdev_fini(sa); + +fail_mae_switchdev_init: + sfc_mae_detach(sa); + +fail_mae_attach: + sfc_mae_counter_rxq_detach(sa); + +fail_mae_counter_rxq_attach: + sfc_filter_detach(sa); + fail_filter_attach: -fail_set_rss_defaults: + sfc_rss_detach(sa); + +fail_rss_attach: sfc_port_detach(sa); fail_port_attach: @@ -787,13 +1050,27 @@ fail_intr_attach: fail_estimate_rsrc_limits: fail_tunnel_init: efx_tunnel_fini(sa->nic); + sfc_sriov_detach(sa); +fail_sriov_attach: fail_nic_reset: sfc_log_init(sa, "failed %d", rc); return rc; } +void +sfc_pre_detach(struct sfc_adapter *sa) +{ + sfc_log_init(sa, "entry"); + + SFC_ASSERT(!sfc_adapter_is_locked(sa)); + + sfc_repr_proxy_pre_detach(sa); + + sfc_log_init(sa, "done"); +} + void sfc_detach(struct sfc_adapter *sa) { @@ -801,15 +1078,23 @@ sfc_detach(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); + sfc_sriov_vswitch_destroy(sa); + sfc_flow_fini(sa); + sfc_repr_proxy_detach(sa); + sfc_mae_switchdev_fini(sa); + sfc_mae_detach(sa); + sfc_mae_counter_rxq_detach(sa); sfc_filter_detach(sa); + sfc_rss_detach(sa); sfc_port_detach(sa); sfc_ev_detach(sa); sfc_intr_detach(sa); efx_tunnel_fini(sa->nic); + sfc_sriov_detach(sa); - sa->state = SFC_ADAPTER_UNINITIALIZED; + sa->state = SFC_ETHDEV_UNINITIALIZED; } static int @@ -826,6 +1111,8 @@ sfc_kvarg_fv_variant_handler(__rte_unused const char *key, *value = EFX_FW_VARIANT_LOW_LATENCY; else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0) *value = EFX_FW_VARIANT_PACKED_STREAM; + else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0) + *value = EFX_FW_VARIANT_DPDK; else return -EINVAL; @@ -861,6 +1148,10 @@ sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv) *efv = EFX_FW_VARIANT_PACKED_STREAM; break; + case EFX_RXDP_DPDK_FW_ID: + *efv = EFX_FW_VARIANT_DPDK; + break; + default: /* * Other firmware variants are not considered, since they are @@ -883,11 +1174,39 @@ sfc_fw_variant2str(efx_fw_variant_t efv) return SFC_KVARG_FW_VARIANT_LOW_LATENCY; case EFX_RXDP_PACKED_STREAM_FW_ID: return SFC_KVARG_FW_VARIANT_PACKED_STREAM; + case EFX_RXDP_DPDK_FW_ID: + return SFC_KVARG_FW_VARIANT_DPDK; default: return "unknown"; } } +static int +sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa) +{ + int rc; + long value; + + value = SFC_RXD_WAIT_TIMEOUT_NS_DEF; + + rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS, + sfc_kvarg_long_handler, &value); + if (rc != 0) + return rc; + + if (value < 0 || + (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) { + sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' " + "was set (%ld);", value); + sfc_err(sa, "it must not be less than 0 or greater than %u", + EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX); + return EINVAL; + } + + sa->rxd_wait_timeout_ns = value; + return 0; +} + static int sfc_nic_probe(struct sfc_adapter *sa) { @@ -905,6 +1224,10 @@ sfc_nic_probe(struct sfc_adapter *sa) return rc; } + rc = sfc_kvarg_rxd_wait_timeout_ns(sa); + if (rc != 0) + return rc; + rc = efx_nic_probe(enp, preferred_efv); if (rc == EACCES) { /* Unprivileged functions cannot set FW variant */ @@ -935,8 +1258,9 @@ sfc_nic_probe(struct sfc_adapter *sa) int sfc_probe(struct sfc_adapter *sa) { - struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(sa->eth_dev); - unsigned int membar; + efx_bar_region_t mem_ebrp; + struct rte_eth_dev *eth_dev = sa->eth_dev; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); efx_nic_t *enp; int rc; @@ -948,21 +1272,24 @@ sfc_probe(struct sfc_adapter *sa) rte_atomic32_init(&sa->restart_required); sfc_log_init(sa, "get family"); - rc = efx_family(pci_dev->id.vendor_id, pci_dev->id.device_id, - &sa->family, &membar); + rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family); + if (rc != 0) goto fail_family; - sfc_log_init(sa, "family is %u, membar is %u", sa->family, membar); + sfc_log_init(sa, + "family is %u, membar is %u, function control window offset is %lu", + sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset); sfc_log_init(sa, "init mem bar"); - rc = sfc_mem_bar_init(sa, membar); + rc = sfc_mem_bar_init(sa, &mem_ebrp); if (rc != 0) goto fail_mem_bar_init; sfc_log_init(sa, "create nic"); rte_spinlock_init(&sa->nic_lock); rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa, - &sa->mem_bar, &sa->nic_lock, &enp); + &sa->mem_bar, mem_ebrp.ebr_offset, + &sa->nic_lock, &enp); if (rc != 0) goto fail_nic_create; sa->nic = enp; @@ -1018,6 +1345,9 @@ sfc_unprobe(struct sfc_adapter *sa) */ rte_eal_alarm_cancel(sfc_restart_if_required, sa); + sfc_mae_clear_switch_port(sa->mae.switch_domain_id, + sa->mae.switch_port_id); + sfc_log_init(sa, "destroy nic"); sa->nic = NULL; efx_nic_destroy(enp); @@ -1025,12 +1355,12 @@ sfc_unprobe(struct sfc_adapter *sa) sfc_mem_bar_fini(sa); sfc_flow_fini(sa); - sa->state = SFC_ADAPTER_UNINITIALIZED; + sa->state = SFC_ETHDEV_UNINITIALIZED; } uint32_t -sfc_register_logtype(struct sfc_adapter *sa, const char *lt_prefix_str, - uint32_t ll_default) +sfc_register_logtype(const struct rte_pci_addr *pci_addr, + const char *lt_prefix_str, uint32_t ll_default) { size_t lt_prefix_str_size = strlen(lt_prefix_str); size_t lt_str_size_max; @@ -1041,21 +1371,69 @@ sfc_register_logtype(struct sfc_adapter *sa, const char *lt_prefix_str, ++lt_prefix_str_size; /* Reserve space for prefix separator */ lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1; } else { - return RTE_LOGTYPE_PMD; + return sfc_logtype_driver; } lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0); if (lt_str == NULL) - return RTE_LOGTYPE_PMD; + return sfc_logtype_driver; strncpy(lt_str, lt_prefix_str, lt_prefix_str_size); lt_str[lt_prefix_str_size - 1] = '.'; - rte_pci_device_name(&sa->pci_addr, lt_str + lt_prefix_str_size, + rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size, lt_str_size_max - lt_prefix_str_size); lt_str[lt_str_size_max - 1] = '\0'; ret = rte_log_register_type_and_pick_level(lt_str, ll_default); rte_free(lt_str); - return (ret < 0) ? RTE_LOGTYPE_PMD : ret; + if (ret < 0) + return sfc_logtype_driver; + + return ret; +} + +struct sfc_hw_switch_id { + char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)]; +}; + +int +sfc_hw_switch_id_init(struct sfc_adapter *sa, + struct sfc_hw_switch_id **idp) +{ + efx_nic_board_info_t board_info; + struct sfc_hw_switch_id *id; + int rc; + + if (idp == NULL) + return EINVAL; + + id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0); + if (id == NULL) + return ENOMEM; + + rc = efx_nic_get_board_info(sa->nic, &board_info); + if (rc != 0) + return rc; + + memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn)); + + *idp = id; + + return 0; +} + +void +sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa, + struct sfc_hw_switch_id *id) +{ + rte_free(id); +} + +bool +sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left, + const struct sfc_hw_switch_id *right) +{ + return strncmp(left->board_sn, right->board_sn, + sizeof(left->board_sn)) == 0; }