1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright(c) 2019-2021 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
19 #include "sfc_debug.h"
23 #include "sfc_mae_counter.h"
25 #include "sfc_kvargs.h"
26 #include "sfc_tweak.h"
27 #include "sfc_sw_stats.h"
28 #include "sfc_switch.h"
29 #include "sfc_nic_dma.h"
32 sfc_repr_supported(const struct sfc_adapter *sa)
38 * Representor proxy should use service lcore on PF's socket
39 * (sa->socket_id) to be efficient. But the proxy will fall back
40 * to any socket if it is not possible to get the service core
41 * on the same socket. Check that at least service core on any
42 * socket is available.
44 if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
51 sfc_repr_available(const struct sfc_adapter_shared *sas)
53 return sas->nb_repr_rxq > 0 && sas->nb_repr_txq > 0;
57 sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id,
58 efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id,
61 const struct rte_memzone *mz;
64 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
65 name, id, len, socket_id);
67 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
68 sysconf(_SC_PAGESIZE), socket_id);
70 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
71 name, (unsigned int)id, (unsigned int)len, socket_id,
72 rte_strerror(rte_errno));
75 if (mz->iova == RTE_BAD_IOVA) {
76 (void)rte_memzone_free(mz);
80 rc = sfc_nic_dma_mz_map(sa, mz, addr_type, &esmp->esm_addr);
82 (void)rte_memzone_free(mz);
87 esmp->esm_base = mz->addr;
90 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
91 name, id, len, socket_id, esmp->esm_base,
92 (unsigned long)esmp->esm_addr);
98 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
102 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
104 rc = rte_memzone_free(esmp->esm_mz);
106 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
108 memset(esmp, 0, sizeof(*esmp));
112 sfc_phy_cap_from_link_speeds(uint32_t speeds)
114 uint32_t phy_caps = 0;
116 if (~speeds & RTE_ETH_LINK_SPEED_FIXED) {
117 phy_caps |= (1 << EFX_PHY_CAP_AN);
119 * If no speeds are specified in the mask, any supported
122 if (speeds == RTE_ETH_LINK_SPEED_AUTONEG)
124 (1 << EFX_PHY_CAP_1000FDX) |
125 (1 << EFX_PHY_CAP_10000FDX) |
126 (1 << EFX_PHY_CAP_25000FDX) |
127 (1 << EFX_PHY_CAP_40000FDX) |
128 (1 << EFX_PHY_CAP_50000FDX) |
129 (1 << EFX_PHY_CAP_100000FDX);
131 if (speeds & RTE_ETH_LINK_SPEED_1G)
132 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
133 if (speeds & RTE_ETH_LINK_SPEED_10G)
134 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
135 if (speeds & RTE_ETH_LINK_SPEED_25G)
136 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
137 if (speeds & RTE_ETH_LINK_SPEED_40G)
138 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
139 if (speeds & RTE_ETH_LINK_SPEED_50G)
140 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
141 if (speeds & RTE_ETH_LINK_SPEED_100G)
142 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
148 * Check requested device level configuration.
149 * Receive and transmit configuration is checked in corresponding
153 sfc_check_conf(struct sfc_adapter *sa)
155 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
158 sa->port.phy_adv_cap =
159 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
160 sa->port.phy_adv_cap_mask;
161 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
162 sfc_err(sa, "No link speeds from mask %#x are supported",
167 #if !EFSYS_OPT_LOOPBACK
168 if (conf->lpbk_mode != 0) {
169 sfc_err(sa, "Loopback not supported");
174 if (conf->dcb_capability_en != 0) {
175 sfc_err(sa, "Priority-based flow control not supported");
179 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
180 sfc_err(sa, "Flow Director not supported");
184 if ((conf->intr_conf.lsc != 0) &&
185 (sa->intr.type != EFX_INTR_LINE) &&
186 (sa->intr.type != EFX_INTR_MESSAGE)) {
187 sfc_err(sa, "Link status change interrupt not supported");
191 if (conf->intr_conf.rxq != 0 &&
192 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
193 sfc_err(sa, "Receive queue interrupt not supported");
201 * Find out maximum number of receive and transmit queues which could be
204 * NIC is kept initialized on success to allow other modules acquire
205 * defaults and capabilities.
208 sfc_estimate_resource_limits(struct sfc_adapter *sa)
210 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
211 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
212 efx_drv_limits_t limits;
214 uint32_t evq_allocated;
215 uint32_t rxq_allocated;
216 uint32_t txq_allocated;
218 memset(&limits, 0, sizeof(limits));
220 /* Request at least one Rx and Tx queue */
221 limits.edl_min_rxq_count = 1;
222 limits.edl_min_txq_count = 1;
223 /* Management event queue plus event queue for each Tx and Rx queue */
224 limits.edl_min_evq_count =
225 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
227 /* Divide by number of functions to guarantee that all functions
228 * will get promised resources
230 /* FIXME Divide by number of functions (not 2) below */
231 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
232 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
234 /* Split equally between receive and transmit */
235 limits.edl_max_rxq_count =
236 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
237 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
239 limits.edl_max_txq_count =
240 MIN(encp->enc_txq_limit,
241 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
243 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
244 limits.edl_max_txq_count =
245 MIN(limits.edl_max_txq_count,
246 encp->enc_fw_assisted_tso_v2_n_contexts /
247 encp->enc_hw_pf_count);
249 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
251 /* Configure the minimum required resources needed for the
252 * driver to operate, and the maximum desired resources that the
253 * driver is capable of using.
255 efx_nic_set_drv_limits(sa->nic, &limits);
257 sfc_log_init(sa, "init nic");
258 rc = efx_nic_init(sa->nic);
262 /* Find resource dimensions assigned by firmware to this function */
263 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
266 goto fail_get_vi_pool;
268 /* It still may allocate more than maximum, ensure limit */
269 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
270 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
271 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
274 * Subtract management EVQ not used for traffic
275 * The resource allocation strategy is as follows:
276 * - one EVQ for management
277 * - one EVQ for each ethdev RXQ
278 * - one EVQ for each ethdev TXQ
279 * - one EVQ and one RXQ for optional MAE counters.
281 if (evq_allocated == 0) {
282 sfc_err(sa, "count of allocated EvQ is 0");
284 goto fail_allocate_evq;
289 * Reserve absolutely required minimum.
290 * Right now we use separate EVQ for Rx and Tx.
292 if (rxq_allocated > 0 && evq_allocated > 0) {
297 if (txq_allocated > 0 && evq_allocated > 0) {
303 if (sfc_mae_counter_rxq_required(sa) &&
304 rxq_allocated > 0 && evq_allocated > 0) {
307 sas->counters_rxq_allocated = true;
309 sas->counters_rxq_allocated = false;
312 if (sfc_repr_supported(sa) &&
313 evq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN +
314 SFC_REPR_PROXY_NB_TXQ_MIN &&
315 rxq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN &&
316 txq_allocated >= SFC_REPR_PROXY_NB_TXQ_MIN) {
319 txq_allocated -= SFC_REPR_PROXY_NB_TXQ_MIN;
320 rxq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN;
321 evq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN +
322 SFC_REPR_PROXY_NB_TXQ_MIN;
324 sas->nb_repr_rxq = SFC_REPR_PROXY_NB_RXQ_MIN;
325 sas->nb_repr_txq = SFC_REPR_PROXY_NB_TXQ_MIN;
327 /* Allocate extra representor RxQs up to the maximum */
328 extra = MIN(evq_allocated, rxq_allocated);
330 SFC_REPR_PROXY_NB_RXQ_MAX - sas->nb_repr_rxq);
331 evq_allocated -= extra;
332 rxq_allocated -= extra;
333 sas->nb_repr_rxq += extra;
335 /* Allocate extra representor TxQs up to the maximum */
336 extra = MIN(evq_allocated, txq_allocated);
338 SFC_REPR_PROXY_NB_TXQ_MAX - sas->nb_repr_txq);
339 evq_allocated -= extra;
340 txq_allocated -= extra;
341 sas->nb_repr_txq += extra;
343 sas->nb_repr_rxq = 0;
344 sas->nb_repr_txq = 0;
347 /* Add remaining allocated queues */
348 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2);
349 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max);
351 /* Keep NIC initialized */
356 efx_nic_fini(sa->nic);
362 sfc_set_drv_limits(struct sfc_adapter *sa)
364 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
365 const struct rte_eth_dev_data *data = sa->eth_dev->data;
366 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sas);
367 uint32_t txq_reserved = sfc_nb_txq_reserved(sas);
368 efx_drv_limits_t lim;
370 memset(&lim, 0, sizeof(lim));
373 * Limits are strict since take into account initial estimation.
374 * Resource allocation strategy is described in
375 * sfc_estimate_resource_limits().
377 lim.edl_min_evq_count = lim.edl_max_evq_count =
378 1 + data->nb_rx_queues + data->nb_tx_queues +
379 rxq_reserved + txq_reserved;
380 lim.edl_min_rxq_count = lim.edl_max_rxq_count =
381 data->nb_rx_queues + rxq_reserved;
382 lim.edl_min_txq_count = lim.edl_max_txq_count =
383 data->nb_tx_queues + txq_reserved;
385 return efx_nic_set_drv_limits(sa->nic, &lim);
389 sfc_set_fw_subvariant(struct sfc_adapter *sa)
391 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
392 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
393 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
394 unsigned int txq_index;
395 efx_nic_fw_subvariant_t req_fw_subvariant;
396 efx_nic_fw_subvariant_t cur_fw_subvariant;
399 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
400 sfc_info(sa, "no-Tx-checksum subvariant not supported");
404 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
405 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
407 if (txq_info->state & SFC_TXQ_INITIALIZED)
408 tx_offloads |= txq_info->offloads;
411 if (tx_offloads & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
412 RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
413 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
414 RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM))
415 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
417 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
419 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
421 sfc_err(sa, "failed to get FW subvariant: %d", rc);
424 sfc_info(sa, "FW subvariant is %u vs required %u",
425 cur_fw_subvariant, req_fw_subvariant);
427 if (cur_fw_subvariant == req_fw_subvariant)
430 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
432 sfc_err(sa, "failed to set FW subvariant %u: %d",
433 req_fw_subvariant, rc);
436 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
442 sfc_try_start(struct sfc_adapter *sa)
444 const efx_nic_cfg_t *encp;
447 sfc_log_init(sa, "entry");
449 SFC_ASSERT(sfc_adapter_is_locked(sa));
450 SFC_ASSERT(sa->state == SFC_ETHDEV_STARTING);
452 sfc_log_init(sa, "set FW subvariant");
453 rc = sfc_set_fw_subvariant(sa);
455 goto fail_set_fw_subvariant;
457 sfc_log_init(sa, "set resource limits");
458 rc = sfc_set_drv_limits(sa);
460 goto fail_set_drv_limits;
462 sfc_log_init(sa, "init nic");
463 rc = efx_nic_init(sa->nic);
467 sfc_log_init(sa, "reconfigure NIC DMA");
468 rc = efx_nic_dma_reconfigure(sa->nic);
470 sfc_err(sa, "cannot reconfigure NIC DMA: %s", rte_strerror(rc));
471 goto fail_nic_dma_reconfigure;
474 encp = efx_nic_cfg_get(sa->nic);
477 * Refresh (since it may change on NIC reset/restart) a copy of
478 * supported tunnel encapsulations in shared memory to be used
479 * on supported Rx packet type classes get.
481 sa->priv.shared->tunnel_encaps =
482 encp->enc_tunnel_encapsulations_supported;
484 if (encp->enc_tunnel_encapsulations_supported != 0) {
485 sfc_log_init(sa, "apply tunnel config");
486 rc = efx_tunnel_reconfigure(sa->nic);
488 goto fail_tunnel_reconfigure;
491 rc = sfc_intr_start(sa);
493 goto fail_intr_start;
495 rc = sfc_ev_start(sa);
499 rc = sfc_port_start(sa);
501 goto fail_port_start;
503 rc = sfc_rx_start(sa);
507 rc = sfc_tx_start(sa);
511 rc = sfc_flow_start(sa);
513 goto fail_flows_insert;
515 rc = sfc_repr_proxy_start(sa);
517 goto fail_repr_proxy_start;
519 sfc_log_init(sa, "done");
522 fail_repr_proxy_start:
541 fail_tunnel_reconfigure:
542 fail_nic_dma_reconfigure:
543 efx_nic_fini(sa->nic);
547 fail_set_fw_subvariant:
548 sfc_log_init(sa, "failed %d", rc);
553 sfc_start(struct sfc_adapter *sa)
555 unsigned int start_tries = 3;
558 sfc_log_init(sa, "entry");
560 SFC_ASSERT(sfc_adapter_is_locked(sa));
563 case SFC_ETHDEV_CONFIGURED:
565 case SFC_ETHDEV_STARTED:
566 sfc_notice(sa, "already started");
573 sa->state = SFC_ETHDEV_STARTING;
578 * FIXME Try to recreate vSwitch on start retry.
579 * vSwitch is absent after MC reboot like events and
580 * we should recreate it. May be we need proper
581 * indication instead of guessing.
584 sfc_sriov_vswitch_destroy(sa);
585 rc = sfc_sriov_vswitch_create(sa);
587 goto fail_sriov_vswitch_create;
589 rc = sfc_try_start(sa);
590 } while ((--start_tries > 0) &&
591 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
596 sa->state = SFC_ETHDEV_STARTED;
597 sfc_log_init(sa, "done");
601 fail_sriov_vswitch_create:
602 sa->state = SFC_ETHDEV_CONFIGURED;
604 sfc_log_init(sa, "failed %d", rc);
609 sfc_stop(struct sfc_adapter *sa)
611 sfc_log_init(sa, "entry");
613 SFC_ASSERT(sfc_adapter_is_locked(sa));
616 case SFC_ETHDEV_STARTED:
618 case SFC_ETHDEV_CONFIGURED:
619 sfc_notice(sa, "already stopped");
622 sfc_err(sa, "stop in unexpected state %u", sa->state);
627 sa->state = SFC_ETHDEV_STOPPING;
629 sfc_repr_proxy_stop(sa);
636 efx_nic_fini(sa->nic);
638 sa->state = SFC_ETHDEV_CONFIGURED;
639 sfc_log_init(sa, "done");
643 sfc_restart(struct sfc_adapter *sa)
647 SFC_ASSERT(sfc_adapter_is_locked(sa));
649 if (sa->state != SFC_ETHDEV_STARTED)
656 sfc_err(sa, "restart failed");
662 sfc_restart_if_required(void *arg)
664 struct sfc_adapter *sa = arg;
666 /* If restart is scheduled, clear the flag and do it */
667 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
669 sfc_adapter_lock(sa);
670 if (sa->state == SFC_ETHDEV_STARTED)
671 (void)sfc_restart(sa);
672 sfc_adapter_unlock(sa);
677 sfc_schedule_restart(struct sfc_adapter *sa)
681 /* Schedule restart alarm if it is not scheduled yet */
682 if (!rte_atomic32_test_and_set(&sa->restart_required))
685 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
687 sfc_warn(sa, "alarms are not supported, restart is pending");
689 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
691 sfc_notice(sa, "restart scheduled");
695 sfc_configure(struct sfc_adapter *sa)
699 sfc_log_init(sa, "entry");
701 SFC_ASSERT(sfc_adapter_is_locked(sa));
703 SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED ||
704 sa->state == SFC_ETHDEV_CONFIGURED);
705 sa->state = SFC_ETHDEV_CONFIGURING;
707 rc = sfc_check_conf(sa);
709 goto fail_check_conf;
711 rc = sfc_intr_configure(sa);
713 goto fail_intr_configure;
715 rc = sfc_port_configure(sa);
717 goto fail_port_configure;
719 rc = sfc_rx_configure(sa);
721 goto fail_rx_configure;
723 rc = sfc_tx_configure(sa);
725 goto fail_tx_configure;
727 rc = sfc_sw_xstats_configure(sa);
729 goto fail_sw_xstats_configure;
731 sa->state = SFC_ETHDEV_CONFIGURED;
732 sfc_log_init(sa, "done");
735 fail_sw_xstats_configure:
749 sa->state = SFC_ETHDEV_INITIALIZED;
750 sfc_log_init(sa, "failed %d", rc);
755 sfc_close(struct sfc_adapter *sa)
757 sfc_log_init(sa, "entry");
759 SFC_ASSERT(sfc_adapter_is_locked(sa));
761 SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
762 sa->state = SFC_ETHDEV_CLOSING;
764 sfc_sw_xstats_close(sa);
770 sa->state = SFC_ETHDEV_INITIALIZED;
771 sfc_log_init(sa, "done");
775 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
777 struct rte_eth_dev *eth_dev = sa->eth_dev;
778 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
779 efsys_bar_t *ebp = &sa->mem_bar;
780 struct rte_mem_resource *res =
781 &pci_dev->mem_resource[mem_ebrp->ebr_index];
783 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
784 ebp->esb_rid = mem_ebrp->ebr_index;
785 ebp->esb_dev = pci_dev;
786 ebp->esb_base = res->addr;
788 sa->fcw_offset = mem_ebrp->ebr_offset;
794 sfc_mem_bar_fini(struct sfc_adapter *sa)
796 efsys_bar_t *ebp = &sa->mem_bar;
798 SFC_BAR_LOCK_DESTROY(ebp);
799 memset(ebp, 0, sizeof(*ebp));
803 * A fixed RSS key which has a property of being symmetric
804 * (symmetrical flows are distributed to the same CPU)
805 * and also known to give a uniform distribution
806 * (a good distribution of traffic between different CPUs)
808 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
809 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
810 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
811 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
812 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
813 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
817 sfc_rss_attach(struct sfc_adapter *sa)
819 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
822 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
826 rc = efx_ev_init(sa->nic);
830 rc = efx_rx_init(sa->nic);
834 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
836 goto fail_scale_support_get;
838 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
840 goto fail_hash_support_get;
842 rc = sfc_rx_hash_init(sa);
844 goto fail_rx_hash_init;
846 efx_rx_fini(sa->nic);
847 efx_ev_fini(sa->nic);
848 efx_intr_fini(sa->nic);
850 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
851 memset(&rss->dummy_ctx, 0, sizeof(rss->dummy_ctx));
852 rss->dummy_ctx.conf.qid_span = 1;
853 rss->dummy_ctx.dummy = true;
858 fail_hash_support_get:
859 fail_scale_support_get:
860 efx_rx_fini(sa->nic);
863 efx_ev_fini(sa->nic);
866 efx_intr_fini(sa->nic);
873 sfc_rss_detach(struct sfc_adapter *sa)
875 sfc_rx_hash_fini(sa);
879 sfc_attach(struct sfc_adapter *sa)
881 const efx_nic_cfg_t *encp;
882 efx_nic_t *enp = sa->nic;
885 sfc_log_init(sa, "entry");
887 SFC_ASSERT(sfc_adapter_is_locked(sa));
889 efx_mcdi_new_epoch(enp);
891 sfc_log_init(sa, "reset nic");
892 rc = efx_nic_reset(enp);
896 rc = sfc_sriov_attach(sa);
898 goto fail_sriov_attach;
901 * Probed NIC is sufficient for tunnel init.
902 * Initialize tunnel support to be able to use libefx
903 * efx_tunnel_config_udp_{add,remove}() in any state and
904 * efx_tunnel_reconfigure() on start up.
906 rc = efx_tunnel_init(enp);
908 goto fail_tunnel_init;
910 encp = efx_nic_cfg_get(sa->nic);
913 * Make a copy of supported tunnel encapsulations in shared
914 * memory to be used on supported Rx packet type classes get.
916 sa->priv.shared->tunnel_encaps =
917 encp->enc_tunnel_encapsulations_supported;
919 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
920 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
921 encp->enc_tso_v3_enabled;
923 sfc_info(sa, "TSO support isn't available on this adapter");
927 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
928 (RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
929 RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
930 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
931 encp->enc_tso_v3_enabled;
933 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
936 sfc_log_init(sa, "estimate resource limits");
937 rc = sfc_estimate_resource_limits(sa);
939 goto fail_estimate_rsrc_limits;
941 sa->evq_max_entries = encp->enc_evq_max_nevs;
942 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
944 sa->evq_min_entries = encp->enc_evq_min_nevs;
945 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
947 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
948 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
950 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
951 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
953 sa->txq_max_entries = encp->enc_txq_max_ndescs;
954 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
956 sa->txq_min_entries = encp->enc_txq_min_ndescs;
957 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
959 rc = sfc_intr_attach(sa);
961 goto fail_intr_attach;
963 rc = sfc_ev_attach(sa);
967 rc = sfc_port_attach(sa);
969 goto fail_port_attach;
971 rc = sfc_rss_attach(sa);
973 goto fail_rss_attach;
975 rc = sfc_flow_rss_attach(sa);
977 goto fail_flow_rss_attach;
979 rc = sfc_filter_attach(sa);
981 goto fail_filter_attach;
983 rc = sfc_mae_counter_rxq_attach(sa);
985 goto fail_mae_counter_rxq_attach;
987 rc = sfc_mae_attach(sa);
989 goto fail_mae_attach;
991 rc = sfc_mae_switchdev_init(sa);
993 goto fail_mae_switchdev_init;
995 rc = sfc_repr_proxy_attach(sa);
997 goto fail_repr_proxy_attach;
999 sfc_log_init(sa, "fini nic");
1004 rc = sfc_sw_xstats_init(sa);
1006 goto fail_sw_xstats_init;
1009 * Create vSwitch to be able to use VFs when PF is not started yet
1010 * as DPDK port. VFs should be able to talk to each other even
1013 rc = sfc_sriov_vswitch_create(sa);
1015 goto fail_sriov_vswitch_create;
1017 sa->state = SFC_ETHDEV_INITIALIZED;
1019 sfc_log_init(sa, "done");
1022 fail_sriov_vswitch_create:
1023 sfc_sw_xstats_close(sa);
1025 fail_sw_xstats_init:
1027 sfc_repr_proxy_detach(sa);
1029 fail_repr_proxy_attach:
1030 sfc_mae_switchdev_fini(sa);
1032 fail_mae_switchdev_init:
1036 sfc_mae_counter_rxq_detach(sa);
1038 fail_mae_counter_rxq_attach:
1039 sfc_filter_detach(sa);
1042 sfc_flow_rss_detach(sa);
1044 fail_flow_rss_attach:
1048 sfc_port_detach(sa);
1054 sfc_intr_detach(sa);
1057 efx_nic_fini(sa->nic);
1059 fail_estimate_rsrc_limits:
1061 efx_tunnel_fini(sa->nic);
1062 sfc_sriov_detach(sa);
1067 sfc_log_init(sa, "failed %d", rc);
1072 sfc_pre_detach(struct sfc_adapter *sa)
1074 sfc_log_init(sa, "entry");
1076 SFC_ASSERT(!sfc_adapter_is_locked(sa));
1078 sfc_repr_proxy_pre_detach(sa);
1080 sfc_log_init(sa, "done");
1084 sfc_detach(struct sfc_adapter *sa)
1086 sfc_log_init(sa, "entry");
1088 SFC_ASSERT(sfc_adapter_is_locked(sa));
1090 sfc_sriov_vswitch_destroy(sa);
1094 sfc_repr_proxy_detach(sa);
1095 sfc_mae_switchdev_fini(sa);
1097 sfc_mae_counter_rxq_detach(sa);
1098 sfc_filter_detach(sa);
1099 sfc_flow_rss_detach(sa);
1101 sfc_port_detach(sa);
1103 sfc_intr_detach(sa);
1104 efx_tunnel_fini(sa->nic);
1105 sfc_sriov_detach(sa);
1107 sa->state = SFC_ETHDEV_UNINITIALIZED;
1111 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
1112 const char *value_str, void *opaque)
1114 uint32_t *value = opaque;
1116 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
1117 *value = EFX_FW_VARIANT_DONT_CARE;
1118 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
1119 *value = EFX_FW_VARIANT_FULL_FEATURED;
1120 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
1121 *value = EFX_FW_VARIANT_LOW_LATENCY;
1122 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
1123 *value = EFX_FW_VARIANT_PACKED_STREAM;
1124 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
1125 *value = EFX_FW_VARIANT_DPDK;
1133 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
1135 efx_nic_fw_info_t enfi;
1138 rc = efx_nic_get_fw_version(sa->nic, &enfi);
1141 else if (!enfi.enfi_dpcpu_fw_ids_valid)
1145 * Firmware variant can be uniquely identified by the RxDPCPU
1148 switch (enfi.enfi_rx_dpcpu_fw_id) {
1149 case EFX_RXDP_FULL_FEATURED_FW_ID:
1150 *efv = EFX_FW_VARIANT_FULL_FEATURED;
1153 case EFX_RXDP_LOW_LATENCY_FW_ID:
1154 *efv = EFX_FW_VARIANT_LOW_LATENCY;
1157 case EFX_RXDP_PACKED_STREAM_FW_ID:
1158 *efv = EFX_FW_VARIANT_PACKED_STREAM;
1161 case EFX_RXDP_DPDK_FW_ID:
1162 *efv = EFX_FW_VARIANT_DPDK;
1167 * Other firmware variants are not considered, since they are
1168 * not supported in the device parameters
1170 *efv = EFX_FW_VARIANT_DONT_CARE;
1178 sfc_fw_variant2str(efx_fw_variant_t efv)
1181 case EFX_RXDP_FULL_FEATURED_FW_ID:
1182 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1183 case EFX_RXDP_LOW_LATENCY_FW_ID:
1184 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1185 case EFX_RXDP_PACKED_STREAM_FW_ID:
1186 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1187 case EFX_RXDP_DPDK_FW_ID:
1188 return SFC_KVARG_FW_VARIANT_DPDK;
1195 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1200 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1202 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1203 sfc_kvarg_long_handler, &value);
1208 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1209 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1210 "was set (%ld);", value);
1211 sfc_err(sa, "it must not be less than 0 or greater than %u",
1212 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1216 sa->rxd_wait_timeout_ns = value;
1221 sfc_nic_probe(struct sfc_adapter *sa)
1223 efx_nic_t *enp = sa->nic;
1224 efx_fw_variant_t preferred_efv;
1225 efx_fw_variant_t efv;
1228 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1229 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1230 sfc_kvarg_fv_variant_handler,
1233 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1237 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1241 rc = efx_nic_probe(enp, preferred_efv);
1243 /* Unprivileged functions cannot set FW variant */
1244 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1249 rc = sfc_get_fw_variant(sa, &efv);
1250 if (rc == ENOTSUP) {
1251 sfc_warn(sa, "FW variant can not be obtained");
1257 /* Check that firmware variant was changed to the requested one */
1258 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1259 sfc_warn(sa, "FW variant has not changed to the requested %s",
1260 sfc_fw_variant2str(preferred_efv));
1263 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1269 sfc_probe(struct sfc_adapter *sa)
1271 efx_bar_region_t mem_ebrp;
1272 struct rte_eth_dev *eth_dev = sa->eth_dev;
1273 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1277 sfc_log_init(sa, "entry");
1279 SFC_ASSERT(sfc_adapter_is_locked(sa));
1281 sa->socket_id = rte_socket_id();
1282 rte_atomic32_init(&sa->restart_required);
1284 sfc_log_init(sa, "get family");
1285 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1290 "family is %u, membar is %u, function control window offset is %lu",
1291 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1293 sfc_log_init(sa, "init mem bar");
1294 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1296 goto fail_mem_bar_init;
1298 sfc_log_init(sa, "create nic");
1299 rte_spinlock_init(&sa->nic_lock);
1300 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1301 &sa->mem_bar, mem_ebrp.ebr_offset,
1302 &sa->nic_lock, &enp);
1304 goto fail_nic_create;
1307 rc = sfc_mcdi_init(sa);
1309 goto fail_mcdi_init;
1311 sfc_log_init(sa, "probe nic");
1312 rc = sfc_nic_probe(sa);
1314 goto fail_nic_probe;
1316 sfc_log_init(sa, "done");
1323 sfc_log_init(sa, "destroy nic");
1325 efx_nic_destroy(enp);
1328 sfc_mem_bar_fini(sa);
1332 sfc_log_init(sa, "failed %d", rc);
1337 sfc_unprobe(struct sfc_adapter *sa)
1339 efx_nic_t *enp = sa->nic;
1341 sfc_log_init(sa, "entry");
1343 SFC_ASSERT(sfc_adapter_is_locked(sa));
1345 sfc_log_init(sa, "unprobe nic");
1346 efx_nic_unprobe(enp);
1351 * Make sure there is no pending alarm to restart since we are
1352 * going to free device private which is passed as the callback
1353 * opaque data. A new alarm cannot be scheduled since MCDI is
1356 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1358 sfc_mae_clear_switch_port(sa->mae.switch_domain_id,
1359 sa->mae.switch_port_id);
1361 sfc_log_init(sa, "destroy nic");
1363 efx_nic_destroy(enp);
1365 sfc_mem_bar_fini(sa);
1368 sa->state = SFC_ETHDEV_UNINITIALIZED;
1372 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1373 const char *lt_prefix_str, uint32_t ll_default)
1375 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1376 size_t lt_str_size_max;
1377 char *lt_str = NULL;
1380 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1381 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1382 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1384 return sfc_logtype_driver;
1387 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1389 return sfc_logtype_driver;
1391 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1392 lt_str[lt_prefix_str_size - 1] = '.';
1393 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1394 lt_str_size_max - lt_prefix_str_size);
1395 lt_str[lt_str_size_max - 1] = '\0';
1397 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1401 return sfc_logtype_driver;
1406 struct sfc_hw_switch_id {
1407 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1411 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1412 struct sfc_hw_switch_id **idp)
1414 efx_nic_board_info_t board_info;
1415 struct sfc_hw_switch_id *id;
1421 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1425 rc = efx_nic_get_board_info(sa->nic, &board_info);
1429 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1437 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1438 struct sfc_hw_switch_id *id)
1444 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1445 const struct sfc_hw_switch_id *right)
1447 return strncmp(left->board_sn, right->board_sn,
1448 sizeof(left->board_sn)) == 0;