1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright(c) 2019-2021 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
19 #include "sfc_debug.h"
23 #include "sfc_mae_counter.h"
25 #include "sfc_kvargs.h"
26 #include "sfc_tweak.h"
27 #include "sfc_sw_stats.h"
31 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
32 size_t len, int socket_id, efsys_mem_t *esmp)
34 const struct rte_memzone *mz;
36 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
37 name, id, len, socket_id);
39 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
40 sysconf(_SC_PAGESIZE), socket_id);
42 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
43 name, (unsigned int)id, (unsigned int)len, socket_id,
44 rte_strerror(rte_errno));
48 esmp->esm_addr = mz->iova;
49 if (esmp->esm_addr == RTE_BAD_IOVA) {
50 (void)rte_memzone_free(mz);
55 esmp->esm_base = mz->addr;
58 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
59 name, id, len, socket_id, esmp->esm_base,
60 (unsigned long)esmp->esm_addr);
66 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
70 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
72 rc = rte_memzone_free(esmp->esm_mz);
74 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
76 memset(esmp, 0, sizeof(*esmp));
80 sfc_phy_cap_from_link_speeds(uint32_t speeds)
82 uint32_t phy_caps = 0;
84 if (~speeds & ETH_LINK_SPEED_FIXED) {
85 phy_caps |= (1 << EFX_PHY_CAP_AN);
87 * If no speeds are specified in the mask, any supported
90 if (speeds == ETH_LINK_SPEED_AUTONEG)
92 (1 << EFX_PHY_CAP_1000FDX) |
93 (1 << EFX_PHY_CAP_10000FDX) |
94 (1 << EFX_PHY_CAP_25000FDX) |
95 (1 << EFX_PHY_CAP_40000FDX) |
96 (1 << EFX_PHY_CAP_50000FDX) |
97 (1 << EFX_PHY_CAP_100000FDX);
99 if (speeds & ETH_LINK_SPEED_1G)
100 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
101 if (speeds & ETH_LINK_SPEED_10G)
102 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
103 if (speeds & ETH_LINK_SPEED_25G)
104 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
105 if (speeds & ETH_LINK_SPEED_40G)
106 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
107 if (speeds & ETH_LINK_SPEED_50G)
108 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
109 if (speeds & ETH_LINK_SPEED_100G)
110 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
116 * Check requested device level configuration.
117 * Receive and transmit configuration is checked in corresponding
121 sfc_check_conf(struct sfc_adapter *sa)
123 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
126 sa->port.phy_adv_cap =
127 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
128 sa->port.phy_adv_cap_mask;
129 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
130 sfc_err(sa, "No link speeds from mask %#x are supported",
135 #if !EFSYS_OPT_LOOPBACK
136 if (conf->lpbk_mode != 0) {
137 sfc_err(sa, "Loopback not supported");
142 if (conf->dcb_capability_en != 0) {
143 sfc_err(sa, "Priority-based flow control not supported");
147 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
148 sfc_err(sa, "Flow Director not supported");
152 if ((conf->intr_conf.lsc != 0) &&
153 (sa->intr.type != EFX_INTR_LINE) &&
154 (sa->intr.type != EFX_INTR_MESSAGE)) {
155 sfc_err(sa, "Link status change interrupt not supported");
159 if (conf->intr_conf.rxq != 0 &&
160 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
161 sfc_err(sa, "Receive queue interrupt not supported");
169 * Find out maximum number of receive and transmit queues which could be
172 * NIC is kept initialized on success to allow other modules acquire
173 * defaults and capabilities.
176 sfc_estimate_resource_limits(struct sfc_adapter *sa)
178 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
179 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
180 efx_drv_limits_t limits;
182 uint32_t evq_allocated;
183 uint32_t rxq_allocated;
184 uint32_t txq_allocated;
186 memset(&limits, 0, sizeof(limits));
188 /* Request at least one Rx and Tx queue */
189 limits.edl_min_rxq_count = 1;
190 limits.edl_min_txq_count = 1;
191 /* Management event queue plus event queue for each Tx and Rx queue */
192 limits.edl_min_evq_count =
193 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
195 /* Divide by number of functions to guarantee that all functions
196 * will get promised resources
198 /* FIXME Divide by number of functions (not 2) below */
199 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
200 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
202 /* Split equally between receive and transmit */
203 limits.edl_max_rxq_count =
204 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
205 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
207 limits.edl_max_txq_count =
208 MIN(encp->enc_txq_limit,
209 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
211 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
212 limits.edl_max_txq_count =
213 MIN(limits.edl_max_txq_count,
214 encp->enc_fw_assisted_tso_v2_n_contexts /
215 encp->enc_hw_pf_count);
217 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
219 /* Configure the minimum required resources needed for the
220 * driver to operate, and the maximum desired resources that the
221 * driver is capable of using.
223 efx_nic_set_drv_limits(sa->nic, &limits);
225 sfc_log_init(sa, "init nic");
226 rc = efx_nic_init(sa->nic);
230 /* Find resource dimensions assigned by firmware to this function */
231 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
234 goto fail_get_vi_pool;
236 /* It still may allocate more than maximum, ensure limit */
237 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
238 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
239 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
242 * Subtract management EVQ not used for traffic
243 * The resource allocation strategy is as follows:
244 * - one EVQ for management
245 * - one EVQ for each ethdev RXQ
246 * - one EVQ for each ethdev TXQ
247 * - one EVQ and one RXQ for optional MAE counters.
249 if (evq_allocated == 0) {
250 sfc_err(sa, "count of allocated EvQ is 0");
252 goto fail_allocate_evq;
257 * Reserve absolutely required minimum.
258 * Right now we use separate EVQ for Rx and Tx.
260 if (rxq_allocated > 0 && evq_allocated > 0) {
265 if (txq_allocated > 0 && evq_allocated > 0) {
271 if (sfc_mae_counter_rxq_required(sa) &&
272 rxq_allocated > 0 && evq_allocated > 0) {
275 sas->counters_rxq_allocated = true;
277 sas->counters_rxq_allocated = false;
280 /* Add remaining allocated queues */
281 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2);
282 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max);
284 /* Keep NIC initialized */
289 efx_nic_fini(sa->nic);
295 sfc_set_drv_limits(struct sfc_adapter *sa)
297 const struct rte_eth_dev_data *data = sa->eth_dev->data;
298 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sfc_sa2shared(sa));
299 efx_drv_limits_t lim;
301 memset(&lim, 0, sizeof(lim));
304 * Limits are strict since take into account initial estimation.
305 * Resource allocation stategy is described in
306 * sfc_estimate_resource_limits().
308 lim.edl_min_evq_count = lim.edl_max_evq_count =
309 1 + data->nb_rx_queues + data->nb_tx_queues + rxq_reserved;
310 lim.edl_min_rxq_count = lim.edl_max_rxq_count =
311 data->nb_rx_queues + rxq_reserved;
312 lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
314 return efx_nic_set_drv_limits(sa->nic, &lim);
318 sfc_set_fw_subvariant(struct sfc_adapter *sa)
320 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
321 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
322 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
323 unsigned int txq_index;
324 efx_nic_fw_subvariant_t req_fw_subvariant;
325 efx_nic_fw_subvariant_t cur_fw_subvariant;
328 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
329 sfc_info(sa, "no-Tx-checksum subvariant not supported");
333 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
334 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
336 if (txq_info->state & SFC_TXQ_INITIALIZED)
337 tx_offloads |= txq_info->offloads;
340 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
341 DEV_TX_OFFLOAD_TCP_CKSUM |
342 DEV_TX_OFFLOAD_UDP_CKSUM |
343 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
344 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
346 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
348 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
350 sfc_err(sa, "failed to get FW subvariant: %d", rc);
353 sfc_info(sa, "FW subvariant is %u vs required %u",
354 cur_fw_subvariant, req_fw_subvariant);
356 if (cur_fw_subvariant == req_fw_subvariant)
359 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
361 sfc_err(sa, "failed to set FW subvariant %u: %d",
362 req_fw_subvariant, rc);
365 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
371 sfc_try_start(struct sfc_adapter *sa)
373 const efx_nic_cfg_t *encp;
376 sfc_log_init(sa, "entry");
378 SFC_ASSERT(sfc_adapter_is_locked(sa));
379 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
381 sfc_log_init(sa, "set FW subvariant");
382 rc = sfc_set_fw_subvariant(sa);
384 goto fail_set_fw_subvariant;
386 sfc_log_init(sa, "set resource limits");
387 rc = sfc_set_drv_limits(sa);
389 goto fail_set_drv_limits;
391 sfc_log_init(sa, "init nic");
392 rc = efx_nic_init(sa->nic);
396 encp = efx_nic_cfg_get(sa->nic);
399 * Refresh (since it may change on NIC reset/restart) a copy of
400 * supported tunnel encapsulations in shared memory to be used
401 * on supported Rx packet type classes get.
403 sa->priv.shared->tunnel_encaps =
404 encp->enc_tunnel_encapsulations_supported;
406 if (encp->enc_tunnel_encapsulations_supported != 0) {
407 sfc_log_init(sa, "apply tunnel config");
408 rc = efx_tunnel_reconfigure(sa->nic);
410 goto fail_tunnel_reconfigure;
413 rc = sfc_intr_start(sa);
415 goto fail_intr_start;
417 rc = sfc_ev_start(sa);
421 rc = sfc_port_start(sa);
423 goto fail_port_start;
425 rc = sfc_rx_start(sa);
429 rc = sfc_tx_start(sa);
433 rc = sfc_flow_start(sa);
435 goto fail_flows_insert;
437 sfc_log_init(sa, "done");
456 fail_tunnel_reconfigure:
457 efx_nic_fini(sa->nic);
461 fail_set_fw_subvariant:
462 sfc_log_init(sa, "failed %d", rc);
467 sfc_start(struct sfc_adapter *sa)
469 unsigned int start_tries = 3;
472 sfc_log_init(sa, "entry");
474 SFC_ASSERT(sfc_adapter_is_locked(sa));
477 case SFC_ADAPTER_CONFIGURED:
479 case SFC_ADAPTER_STARTED:
480 sfc_notice(sa, "already started");
487 sa->state = SFC_ADAPTER_STARTING;
492 * FIXME Try to recreate vSwitch on start retry.
493 * vSwitch is absent after MC reboot like events and
494 * we should recreate it. May be we need proper
495 * indication instead of guessing.
498 sfc_sriov_vswitch_destroy(sa);
499 rc = sfc_sriov_vswitch_create(sa);
501 goto fail_sriov_vswitch_create;
503 rc = sfc_try_start(sa);
504 } while ((--start_tries > 0) &&
505 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
510 sa->state = SFC_ADAPTER_STARTED;
511 sfc_log_init(sa, "done");
515 fail_sriov_vswitch_create:
516 sa->state = SFC_ADAPTER_CONFIGURED;
518 sfc_log_init(sa, "failed %d", rc);
523 sfc_stop(struct sfc_adapter *sa)
525 sfc_log_init(sa, "entry");
527 SFC_ASSERT(sfc_adapter_is_locked(sa));
530 case SFC_ADAPTER_STARTED:
532 case SFC_ADAPTER_CONFIGURED:
533 sfc_notice(sa, "already stopped");
536 sfc_err(sa, "stop in unexpected state %u", sa->state);
541 sa->state = SFC_ADAPTER_STOPPING;
549 efx_nic_fini(sa->nic);
551 sa->state = SFC_ADAPTER_CONFIGURED;
552 sfc_log_init(sa, "done");
556 sfc_restart(struct sfc_adapter *sa)
560 SFC_ASSERT(sfc_adapter_is_locked(sa));
562 if (sa->state != SFC_ADAPTER_STARTED)
569 sfc_err(sa, "restart failed");
575 sfc_restart_if_required(void *arg)
577 struct sfc_adapter *sa = arg;
579 /* If restart is scheduled, clear the flag and do it */
580 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
582 sfc_adapter_lock(sa);
583 if (sa->state == SFC_ADAPTER_STARTED)
584 (void)sfc_restart(sa);
585 sfc_adapter_unlock(sa);
590 sfc_schedule_restart(struct sfc_adapter *sa)
594 /* Schedule restart alarm if it is not scheduled yet */
595 if (!rte_atomic32_test_and_set(&sa->restart_required))
598 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
600 sfc_warn(sa, "alarms are not supported, restart is pending");
602 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
604 sfc_notice(sa, "restart scheduled");
608 sfc_configure(struct sfc_adapter *sa)
612 sfc_log_init(sa, "entry");
614 SFC_ASSERT(sfc_adapter_is_locked(sa));
616 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
617 sa->state == SFC_ADAPTER_CONFIGURED);
618 sa->state = SFC_ADAPTER_CONFIGURING;
620 rc = sfc_check_conf(sa);
622 goto fail_check_conf;
624 rc = sfc_intr_configure(sa);
626 goto fail_intr_configure;
628 rc = sfc_port_configure(sa);
630 goto fail_port_configure;
632 rc = sfc_rx_configure(sa);
634 goto fail_rx_configure;
636 rc = sfc_tx_configure(sa);
638 goto fail_tx_configure;
640 rc = sfc_sw_xstats_configure(sa);
642 goto fail_sw_xstats_configure;
644 sa->state = SFC_ADAPTER_CONFIGURED;
645 sfc_log_init(sa, "done");
648 fail_sw_xstats_configure:
662 sa->state = SFC_ADAPTER_INITIALIZED;
663 sfc_log_init(sa, "failed %d", rc);
668 sfc_close(struct sfc_adapter *sa)
670 sfc_log_init(sa, "entry");
672 SFC_ASSERT(sfc_adapter_is_locked(sa));
674 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
675 sa->state = SFC_ADAPTER_CLOSING;
677 sfc_sw_xstats_close(sa);
683 sa->state = SFC_ADAPTER_INITIALIZED;
684 sfc_log_init(sa, "done");
688 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
690 struct rte_eth_dev *eth_dev = sa->eth_dev;
691 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
692 efsys_bar_t *ebp = &sa->mem_bar;
693 struct rte_mem_resource *res =
694 &pci_dev->mem_resource[mem_ebrp->ebr_index];
696 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
697 ebp->esb_rid = mem_ebrp->ebr_index;
698 ebp->esb_dev = pci_dev;
699 ebp->esb_base = res->addr;
701 sa->fcw_offset = mem_ebrp->ebr_offset;
707 sfc_mem_bar_fini(struct sfc_adapter *sa)
709 efsys_bar_t *ebp = &sa->mem_bar;
711 SFC_BAR_LOCK_DESTROY(ebp);
712 memset(ebp, 0, sizeof(*ebp));
716 * A fixed RSS key which has a property of being symmetric
717 * (symmetrical flows are distributed to the same CPU)
718 * and also known to give a uniform distribution
719 * (a good distribution of traffic between different CPUs)
721 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
722 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
723 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
724 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
725 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
726 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
730 sfc_rss_attach(struct sfc_adapter *sa)
732 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
735 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
739 rc = efx_ev_init(sa->nic);
743 rc = efx_rx_init(sa->nic);
747 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
749 goto fail_scale_support_get;
751 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
753 goto fail_hash_support_get;
755 rc = sfc_rx_hash_init(sa);
757 goto fail_rx_hash_init;
759 efx_rx_fini(sa->nic);
760 efx_ev_fini(sa->nic);
761 efx_intr_fini(sa->nic);
763 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
764 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
769 fail_hash_support_get:
770 fail_scale_support_get:
771 efx_rx_fini(sa->nic);
774 efx_ev_fini(sa->nic);
777 efx_intr_fini(sa->nic);
784 sfc_rss_detach(struct sfc_adapter *sa)
786 sfc_rx_hash_fini(sa);
790 sfc_attach(struct sfc_adapter *sa)
792 const efx_nic_cfg_t *encp;
793 efx_nic_t *enp = sa->nic;
796 sfc_log_init(sa, "entry");
798 SFC_ASSERT(sfc_adapter_is_locked(sa));
800 efx_mcdi_new_epoch(enp);
802 sfc_log_init(sa, "reset nic");
803 rc = efx_nic_reset(enp);
807 rc = sfc_sriov_attach(sa);
809 goto fail_sriov_attach;
812 * Probed NIC is sufficient for tunnel init.
813 * Initialize tunnel support to be able to use libefx
814 * efx_tunnel_config_udp_{add,remove}() in any state and
815 * efx_tunnel_reconfigure() on start up.
817 rc = efx_tunnel_init(enp);
819 goto fail_tunnel_init;
821 encp = efx_nic_cfg_get(sa->nic);
824 * Make a copy of supported tunnel encapsulations in shared
825 * memory to be used on supported Rx packet type classes get.
827 sa->priv.shared->tunnel_encaps =
828 encp->enc_tunnel_encapsulations_supported;
830 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
831 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
832 encp->enc_tso_v3_enabled;
834 sfc_info(sa, "TSO support isn't available on this adapter");
838 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
839 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
840 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
841 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
842 encp->enc_tso_v3_enabled;
844 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
847 sfc_log_init(sa, "estimate resource limits");
848 rc = sfc_estimate_resource_limits(sa);
850 goto fail_estimate_rsrc_limits;
852 sa->evq_max_entries = encp->enc_evq_max_nevs;
853 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
855 sa->evq_min_entries = encp->enc_evq_min_nevs;
856 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
858 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
859 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
861 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
862 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
864 sa->txq_max_entries = encp->enc_txq_max_ndescs;
865 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
867 sa->txq_min_entries = encp->enc_txq_min_ndescs;
868 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
870 rc = sfc_intr_attach(sa);
872 goto fail_intr_attach;
874 rc = sfc_ev_attach(sa);
878 rc = sfc_port_attach(sa);
880 goto fail_port_attach;
882 rc = sfc_rss_attach(sa);
884 goto fail_rss_attach;
886 rc = sfc_filter_attach(sa);
888 goto fail_filter_attach;
890 rc = sfc_mae_counter_rxq_attach(sa);
892 goto fail_mae_counter_rxq_attach;
894 rc = sfc_mae_attach(sa);
896 goto fail_mae_attach;
898 rc = sfc_mae_switchdev_init(sa);
900 goto fail_mae_switchdev_init;
902 sfc_log_init(sa, "fini nic");
907 rc = sfc_sw_xstats_init(sa);
909 goto fail_sw_xstats_init;
912 * Create vSwitch to be able to use VFs when PF is not started yet
913 * as DPDK port. VFs should be able to talk to each other even
916 rc = sfc_sriov_vswitch_create(sa);
918 goto fail_sriov_vswitch_create;
920 sa->state = SFC_ADAPTER_INITIALIZED;
922 sfc_log_init(sa, "done");
925 fail_sriov_vswitch_create:
926 sfc_sw_xstats_close(sa);
930 sfc_mae_switchdev_fini(sa);
932 fail_mae_switchdev_init:
936 sfc_mae_counter_rxq_detach(sa);
938 fail_mae_counter_rxq_attach:
939 sfc_filter_detach(sa);
954 efx_nic_fini(sa->nic);
956 fail_estimate_rsrc_limits:
958 efx_tunnel_fini(sa->nic);
959 sfc_sriov_detach(sa);
964 sfc_log_init(sa, "failed %d", rc);
969 sfc_detach(struct sfc_adapter *sa)
971 sfc_log_init(sa, "entry");
973 SFC_ASSERT(sfc_adapter_is_locked(sa));
975 sfc_sriov_vswitch_destroy(sa);
979 sfc_mae_switchdev_fini(sa);
981 sfc_mae_counter_rxq_detach(sa);
982 sfc_filter_detach(sa);
987 efx_tunnel_fini(sa->nic);
988 sfc_sriov_detach(sa);
990 sa->state = SFC_ADAPTER_UNINITIALIZED;
994 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
995 const char *value_str, void *opaque)
997 uint32_t *value = opaque;
999 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
1000 *value = EFX_FW_VARIANT_DONT_CARE;
1001 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
1002 *value = EFX_FW_VARIANT_FULL_FEATURED;
1003 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
1004 *value = EFX_FW_VARIANT_LOW_LATENCY;
1005 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
1006 *value = EFX_FW_VARIANT_PACKED_STREAM;
1007 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
1008 *value = EFX_FW_VARIANT_DPDK;
1016 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
1018 efx_nic_fw_info_t enfi;
1021 rc = efx_nic_get_fw_version(sa->nic, &enfi);
1024 else if (!enfi.enfi_dpcpu_fw_ids_valid)
1028 * Firmware variant can be uniquely identified by the RxDPCPU
1031 switch (enfi.enfi_rx_dpcpu_fw_id) {
1032 case EFX_RXDP_FULL_FEATURED_FW_ID:
1033 *efv = EFX_FW_VARIANT_FULL_FEATURED;
1036 case EFX_RXDP_LOW_LATENCY_FW_ID:
1037 *efv = EFX_FW_VARIANT_LOW_LATENCY;
1040 case EFX_RXDP_PACKED_STREAM_FW_ID:
1041 *efv = EFX_FW_VARIANT_PACKED_STREAM;
1044 case EFX_RXDP_DPDK_FW_ID:
1045 *efv = EFX_FW_VARIANT_DPDK;
1050 * Other firmware variants are not considered, since they are
1051 * not supported in the device parameters
1053 *efv = EFX_FW_VARIANT_DONT_CARE;
1061 sfc_fw_variant2str(efx_fw_variant_t efv)
1064 case EFX_RXDP_FULL_FEATURED_FW_ID:
1065 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1066 case EFX_RXDP_LOW_LATENCY_FW_ID:
1067 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1068 case EFX_RXDP_PACKED_STREAM_FW_ID:
1069 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1070 case EFX_RXDP_DPDK_FW_ID:
1071 return SFC_KVARG_FW_VARIANT_DPDK;
1078 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1083 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1085 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1086 sfc_kvarg_long_handler, &value);
1091 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1092 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1093 "was set (%ld);", value);
1094 sfc_err(sa, "it must not be less than 0 or greater than %u",
1095 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1099 sa->rxd_wait_timeout_ns = value;
1104 sfc_nic_probe(struct sfc_adapter *sa)
1106 efx_nic_t *enp = sa->nic;
1107 efx_fw_variant_t preferred_efv;
1108 efx_fw_variant_t efv;
1111 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1112 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1113 sfc_kvarg_fv_variant_handler,
1116 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1120 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1124 rc = efx_nic_probe(enp, preferred_efv);
1126 /* Unprivileged functions cannot set FW variant */
1127 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1132 rc = sfc_get_fw_variant(sa, &efv);
1133 if (rc == ENOTSUP) {
1134 sfc_warn(sa, "FW variant can not be obtained");
1140 /* Check that firmware variant was changed to the requested one */
1141 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1142 sfc_warn(sa, "FW variant has not changed to the requested %s",
1143 sfc_fw_variant2str(preferred_efv));
1146 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1152 sfc_probe(struct sfc_adapter *sa)
1154 efx_bar_region_t mem_ebrp;
1155 struct rte_eth_dev *eth_dev = sa->eth_dev;
1156 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1160 sfc_log_init(sa, "entry");
1162 SFC_ASSERT(sfc_adapter_is_locked(sa));
1164 sa->socket_id = rte_socket_id();
1165 rte_atomic32_init(&sa->restart_required);
1167 sfc_log_init(sa, "get family");
1168 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1173 "family is %u, membar is %u, function control window offset is %lu",
1174 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1176 sfc_log_init(sa, "init mem bar");
1177 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1179 goto fail_mem_bar_init;
1181 sfc_log_init(sa, "create nic");
1182 rte_spinlock_init(&sa->nic_lock);
1183 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1184 &sa->mem_bar, mem_ebrp.ebr_offset,
1185 &sa->nic_lock, &enp);
1187 goto fail_nic_create;
1190 rc = sfc_mcdi_init(sa);
1192 goto fail_mcdi_init;
1194 sfc_log_init(sa, "probe nic");
1195 rc = sfc_nic_probe(sa);
1197 goto fail_nic_probe;
1199 sfc_log_init(sa, "done");
1206 sfc_log_init(sa, "destroy nic");
1208 efx_nic_destroy(enp);
1211 sfc_mem_bar_fini(sa);
1215 sfc_log_init(sa, "failed %d", rc);
1220 sfc_unprobe(struct sfc_adapter *sa)
1222 efx_nic_t *enp = sa->nic;
1224 sfc_log_init(sa, "entry");
1226 SFC_ASSERT(sfc_adapter_is_locked(sa));
1228 sfc_log_init(sa, "unprobe nic");
1229 efx_nic_unprobe(enp);
1234 * Make sure there is no pending alarm to restart since we are
1235 * going to free device private which is passed as the callback
1236 * opaque data. A new alarm cannot be scheduled since MCDI is
1239 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1241 sfc_log_init(sa, "destroy nic");
1243 efx_nic_destroy(enp);
1245 sfc_mem_bar_fini(sa);
1248 sa->state = SFC_ADAPTER_UNINITIALIZED;
1252 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1253 const char *lt_prefix_str, uint32_t ll_default)
1255 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1256 size_t lt_str_size_max;
1257 char *lt_str = NULL;
1260 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1261 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1262 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1264 return sfc_logtype_driver;
1267 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1269 return sfc_logtype_driver;
1271 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1272 lt_str[lt_prefix_str_size - 1] = '.';
1273 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1274 lt_str_size_max - lt_prefix_str_size);
1275 lt_str[lt_str_size_max - 1] = '\0';
1277 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1281 return sfc_logtype_driver;
1286 struct sfc_hw_switch_id {
1287 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1291 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1292 struct sfc_hw_switch_id **idp)
1294 efx_nic_board_info_t board_info;
1295 struct sfc_hw_switch_id *id;
1301 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1305 rc = efx_nic_get_board_info(sa->nic, &board_info);
1309 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1317 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1318 struct sfc_hw_switch_id *id)
1324 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1325 const struct sfc_hw_switch_id *right)
1327 return strncmp(left->board_sn, right->board_sn,
1328 sizeof(left->board_sn)) == 0;