1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright(c) 2019-2021 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
19 #include "sfc_debug.h"
23 #include "sfc_mae_counter.h"
25 #include "sfc_kvargs.h"
26 #include "sfc_tweak.h"
27 #include "sfc_sw_stats.h"
28 #include "sfc_switch.h"
31 sfc_repr_supported(const struct sfc_adapter *sa)
37 * Representor proxy should use service lcore on PF's socket
38 * (sa->socket_id) to be efficient. But the proxy will fall back
39 * to any socket if it is not possible to get the service core
40 * on the same socket. Check that at least service core on any
41 * socket is available.
43 if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
50 sfc_repr_available(const struct sfc_adapter_shared *sas)
52 return sas->nb_repr_rxq > 0 && sas->nb_repr_txq > 0;
56 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
57 size_t len, int socket_id, efsys_mem_t *esmp)
59 const struct rte_memzone *mz;
61 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
62 name, id, len, socket_id);
64 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
65 sysconf(_SC_PAGESIZE), socket_id);
67 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
68 name, (unsigned int)id, (unsigned int)len, socket_id,
69 rte_strerror(rte_errno));
73 esmp->esm_addr = mz->iova;
74 if (esmp->esm_addr == RTE_BAD_IOVA) {
75 (void)rte_memzone_free(mz);
80 esmp->esm_base = mz->addr;
83 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
84 name, id, len, socket_id, esmp->esm_base,
85 (unsigned long)esmp->esm_addr);
91 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
95 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
97 rc = rte_memzone_free(esmp->esm_mz);
99 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
101 memset(esmp, 0, sizeof(*esmp));
105 sfc_phy_cap_from_link_speeds(uint32_t speeds)
107 uint32_t phy_caps = 0;
109 if (~speeds & ETH_LINK_SPEED_FIXED) {
110 phy_caps |= (1 << EFX_PHY_CAP_AN);
112 * If no speeds are specified in the mask, any supported
115 if (speeds == ETH_LINK_SPEED_AUTONEG)
117 (1 << EFX_PHY_CAP_1000FDX) |
118 (1 << EFX_PHY_CAP_10000FDX) |
119 (1 << EFX_PHY_CAP_25000FDX) |
120 (1 << EFX_PHY_CAP_40000FDX) |
121 (1 << EFX_PHY_CAP_50000FDX) |
122 (1 << EFX_PHY_CAP_100000FDX);
124 if (speeds & ETH_LINK_SPEED_1G)
125 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
126 if (speeds & ETH_LINK_SPEED_10G)
127 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
128 if (speeds & ETH_LINK_SPEED_25G)
129 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
130 if (speeds & ETH_LINK_SPEED_40G)
131 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
132 if (speeds & ETH_LINK_SPEED_50G)
133 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
134 if (speeds & ETH_LINK_SPEED_100G)
135 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
141 * Check requested device level configuration.
142 * Receive and transmit configuration is checked in corresponding
146 sfc_check_conf(struct sfc_adapter *sa)
148 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
151 sa->port.phy_adv_cap =
152 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
153 sa->port.phy_adv_cap_mask;
154 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
155 sfc_err(sa, "No link speeds from mask %#x are supported",
160 #if !EFSYS_OPT_LOOPBACK
161 if (conf->lpbk_mode != 0) {
162 sfc_err(sa, "Loopback not supported");
167 if (conf->dcb_capability_en != 0) {
168 sfc_err(sa, "Priority-based flow control not supported");
172 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
173 sfc_err(sa, "Flow Director not supported");
177 if ((conf->intr_conf.lsc != 0) &&
178 (sa->intr.type != EFX_INTR_LINE) &&
179 (sa->intr.type != EFX_INTR_MESSAGE)) {
180 sfc_err(sa, "Link status change interrupt not supported");
184 if (conf->intr_conf.rxq != 0 &&
185 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
186 sfc_err(sa, "Receive queue interrupt not supported");
194 * Find out maximum number of receive and transmit queues which could be
197 * NIC is kept initialized on success to allow other modules acquire
198 * defaults and capabilities.
201 sfc_estimate_resource_limits(struct sfc_adapter *sa)
203 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
204 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
205 efx_drv_limits_t limits;
207 uint32_t evq_allocated;
208 uint32_t rxq_allocated;
209 uint32_t txq_allocated;
211 memset(&limits, 0, sizeof(limits));
213 /* Request at least one Rx and Tx queue */
214 limits.edl_min_rxq_count = 1;
215 limits.edl_min_txq_count = 1;
216 /* Management event queue plus event queue for each Tx and Rx queue */
217 limits.edl_min_evq_count =
218 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
220 /* Divide by number of functions to guarantee that all functions
221 * will get promised resources
223 /* FIXME Divide by number of functions (not 2) below */
224 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
225 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
227 /* Split equally between receive and transmit */
228 limits.edl_max_rxq_count =
229 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
230 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
232 limits.edl_max_txq_count =
233 MIN(encp->enc_txq_limit,
234 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
236 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
237 limits.edl_max_txq_count =
238 MIN(limits.edl_max_txq_count,
239 encp->enc_fw_assisted_tso_v2_n_contexts /
240 encp->enc_hw_pf_count);
242 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
244 /* Configure the minimum required resources needed for the
245 * driver to operate, and the maximum desired resources that the
246 * driver is capable of using.
248 efx_nic_set_drv_limits(sa->nic, &limits);
250 sfc_log_init(sa, "init nic");
251 rc = efx_nic_init(sa->nic);
255 /* Find resource dimensions assigned by firmware to this function */
256 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
259 goto fail_get_vi_pool;
261 /* It still may allocate more than maximum, ensure limit */
262 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
263 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
264 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
267 * Subtract management EVQ not used for traffic
268 * The resource allocation strategy is as follows:
269 * - one EVQ for management
270 * - one EVQ for each ethdev RXQ
271 * - one EVQ for each ethdev TXQ
272 * - one EVQ and one RXQ for optional MAE counters.
274 if (evq_allocated == 0) {
275 sfc_err(sa, "count of allocated EvQ is 0");
277 goto fail_allocate_evq;
282 * Reserve absolutely required minimum.
283 * Right now we use separate EVQ for Rx and Tx.
285 if (rxq_allocated > 0 && evq_allocated > 0) {
290 if (txq_allocated > 0 && evq_allocated > 0) {
296 if (sfc_mae_counter_rxq_required(sa) &&
297 rxq_allocated > 0 && evq_allocated > 0) {
300 sas->counters_rxq_allocated = true;
302 sas->counters_rxq_allocated = false;
305 if (sfc_repr_supported(sa) &&
306 evq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN +
307 SFC_REPR_PROXY_NB_TXQ_MIN &&
308 rxq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN &&
309 txq_allocated >= SFC_REPR_PROXY_NB_TXQ_MIN) {
312 txq_allocated -= SFC_REPR_PROXY_NB_TXQ_MIN;
313 rxq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN;
314 evq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN +
315 SFC_REPR_PROXY_NB_TXQ_MIN;
317 sas->nb_repr_rxq = SFC_REPR_PROXY_NB_RXQ_MIN;
318 sas->nb_repr_txq = SFC_REPR_PROXY_NB_TXQ_MIN;
320 /* Allocate extra representor RxQs up to the maximum */
321 extra = MIN(evq_allocated, rxq_allocated);
323 SFC_REPR_PROXY_NB_RXQ_MAX - sas->nb_repr_rxq);
324 evq_allocated -= extra;
325 rxq_allocated -= extra;
326 sas->nb_repr_rxq += extra;
328 /* Allocate extra representor TxQs up to the maximum */
329 extra = MIN(evq_allocated, txq_allocated);
331 SFC_REPR_PROXY_NB_TXQ_MAX - sas->nb_repr_txq);
332 evq_allocated -= extra;
333 txq_allocated -= extra;
334 sas->nb_repr_txq += extra;
336 sas->nb_repr_rxq = 0;
337 sas->nb_repr_txq = 0;
340 /* Add remaining allocated queues */
341 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2);
342 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max);
344 /* Keep NIC initialized */
349 efx_nic_fini(sa->nic);
355 sfc_set_drv_limits(struct sfc_adapter *sa)
357 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
358 const struct rte_eth_dev_data *data = sa->eth_dev->data;
359 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sas);
360 uint32_t txq_reserved = sfc_nb_txq_reserved(sas);
361 efx_drv_limits_t lim;
363 memset(&lim, 0, sizeof(lim));
366 * Limits are strict since take into account initial estimation.
367 * Resource allocation stategy is described in
368 * sfc_estimate_resource_limits().
370 lim.edl_min_evq_count = lim.edl_max_evq_count =
371 1 + data->nb_rx_queues + data->nb_tx_queues +
372 rxq_reserved + txq_reserved;
373 lim.edl_min_rxq_count = lim.edl_max_rxq_count =
374 data->nb_rx_queues + rxq_reserved;
375 lim.edl_min_txq_count = lim.edl_max_txq_count =
376 data->nb_tx_queues + txq_reserved;
378 return efx_nic_set_drv_limits(sa->nic, &lim);
382 sfc_set_fw_subvariant(struct sfc_adapter *sa)
384 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
385 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
386 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
387 unsigned int txq_index;
388 efx_nic_fw_subvariant_t req_fw_subvariant;
389 efx_nic_fw_subvariant_t cur_fw_subvariant;
392 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
393 sfc_info(sa, "no-Tx-checksum subvariant not supported");
397 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
398 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
400 if (txq_info->state & SFC_TXQ_INITIALIZED)
401 tx_offloads |= txq_info->offloads;
404 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
405 DEV_TX_OFFLOAD_TCP_CKSUM |
406 DEV_TX_OFFLOAD_UDP_CKSUM |
407 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
408 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
410 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
412 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
414 sfc_err(sa, "failed to get FW subvariant: %d", rc);
417 sfc_info(sa, "FW subvariant is %u vs required %u",
418 cur_fw_subvariant, req_fw_subvariant);
420 if (cur_fw_subvariant == req_fw_subvariant)
423 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
425 sfc_err(sa, "failed to set FW subvariant %u: %d",
426 req_fw_subvariant, rc);
429 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
435 sfc_try_start(struct sfc_adapter *sa)
437 const efx_nic_cfg_t *encp;
440 sfc_log_init(sa, "entry");
442 SFC_ASSERT(sfc_adapter_is_locked(sa));
443 SFC_ASSERT(sa->state == SFC_ETHDEV_STARTING);
445 sfc_log_init(sa, "set FW subvariant");
446 rc = sfc_set_fw_subvariant(sa);
448 goto fail_set_fw_subvariant;
450 sfc_log_init(sa, "set resource limits");
451 rc = sfc_set_drv_limits(sa);
453 goto fail_set_drv_limits;
455 sfc_log_init(sa, "init nic");
456 rc = efx_nic_init(sa->nic);
460 encp = efx_nic_cfg_get(sa->nic);
463 * Refresh (since it may change on NIC reset/restart) a copy of
464 * supported tunnel encapsulations in shared memory to be used
465 * on supported Rx packet type classes get.
467 sa->priv.shared->tunnel_encaps =
468 encp->enc_tunnel_encapsulations_supported;
470 if (encp->enc_tunnel_encapsulations_supported != 0) {
471 sfc_log_init(sa, "apply tunnel config");
472 rc = efx_tunnel_reconfigure(sa->nic);
474 goto fail_tunnel_reconfigure;
477 rc = sfc_intr_start(sa);
479 goto fail_intr_start;
481 rc = sfc_ev_start(sa);
485 rc = sfc_port_start(sa);
487 goto fail_port_start;
489 rc = sfc_rx_start(sa);
493 rc = sfc_tx_start(sa);
497 rc = sfc_flow_start(sa);
499 goto fail_flows_insert;
501 rc = sfc_repr_proxy_start(sa);
503 goto fail_repr_proxy_start;
505 sfc_log_init(sa, "done");
508 fail_repr_proxy_start:
527 fail_tunnel_reconfigure:
528 efx_nic_fini(sa->nic);
532 fail_set_fw_subvariant:
533 sfc_log_init(sa, "failed %d", rc);
538 sfc_start(struct sfc_adapter *sa)
540 unsigned int start_tries = 3;
543 sfc_log_init(sa, "entry");
545 SFC_ASSERT(sfc_adapter_is_locked(sa));
548 case SFC_ETHDEV_CONFIGURED:
550 case SFC_ETHDEV_STARTED:
551 sfc_notice(sa, "already started");
558 sa->state = SFC_ETHDEV_STARTING;
563 * FIXME Try to recreate vSwitch on start retry.
564 * vSwitch is absent after MC reboot like events and
565 * we should recreate it. May be we need proper
566 * indication instead of guessing.
569 sfc_sriov_vswitch_destroy(sa);
570 rc = sfc_sriov_vswitch_create(sa);
572 goto fail_sriov_vswitch_create;
574 rc = sfc_try_start(sa);
575 } while ((--start_tries > 0) &&
576 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
581 sa->state = SFC_ETHDEV_STARTED;
582 sfc_log_init(sa, "done");
586 fail_sriov_vswitch_create:
587 sa->state = SFC_ETHDEV_CONFIGURED;
589 sfc_log_init(sa, "failed %d", rc);
594 sfc_stop(struct sfc_adapter *sa)
596 sfc_log_init(sa, "entry");
598 SFC_ASSERT(sfc_adapter_is_locked(sa));
601 case SFC_ETHDEV_STARTED:
603 case SFC_ETHDEV_CONFIGURED:
604 sfc_notice(sa, "already stopped");
607 sfc_err(sa, "stop in unexpected state %u", sa->state);
612 sa->state = SFC_ETHDEV_STOPPING;
614 sfc_repr_proxy_stop(sa);
621 efx_nic_fini(sa->nic);
623 sa->state = SFC_ETHDEV_CONFIGURED;
624 sfc_log_init(sa, "done");
628 sfc_restart(struct sfc_adapter *sa)
632 SFC_ASSERT(sfc_adapter_is_locked(sa));
634 if (sa->state != SFC_ETHDEV_STARTED)
641 sfc_err(sa, "restart failed");
647 sfc_restart_if_required(void *arg)
649 struct sfc_adapter *sa = arg;
651 /* If restart is scheduled, clear the flag and do it */
652 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
654 sfc_adapter_lock(sa);
655 if (sa->state == SFC_ETHDEV_STARTED)
656 (void)sfc_restart(sa);
657 sfc_adapter_unlock(sa);
662 sfc_schedule_restart(struct sfc_adapter *sa)
666 /* Schedule restart alarm if it is not scheduled yet */
667 if (!rte_atomic32_test_and_set(&sa->restart_required))
670 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
672 sfc_warn(sa, "alarms are not supported, restart is pending");
674 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
676 sfc_notice(sa, "restart scheduled");
680 sfc_configure(struct sfc_adapter *sa)
684 sfc_log_init(sa, "entry");
686 SFC_ASSERT(sfc_adapter_is_locked(sa));
688 SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED ||
689 sa->state == SFC_ETHDEV_CONFIGURED);
690 sa->state = SFC_ETHDEV_CONFIGURING;
692 rc = sfc_check_conf(sa);
694 goto fail_check_conf;
696 rc = sfc_intr_configure(sa);
698 goto fail_intr_configure;
700 rc = sfc_port_configure(sa);
702 goto fail_port_configure;
704 rc = sfc_rx_configure(sa);
706 goto fail_rx_configure;
708 rc = sfc_tx_configure(sa);
710 goto fail_tx_configure;
712 rc = sfc_sw_xstats_configure(sa);
714 goto fail_sw_xstats_configure;
716 sa->state = SFC_ETHDEV_CONFIGURED;
717 sfc_log_init(sa, "done");
720 fail_sw_xstats_configure:
734 sa->state = SFC_ETHDEV_INITIALIZED;
735 sfc_log_init(sa, "failed %d", rc);
740 sfc_close(struct sfc_adapter *sa)
742 sfc_log_init(sa, "entry");
744 SFC_ASSERT(sfc_adapter_is_locked(sa));
746 SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
747 sa->state = SFC_ETHDEV_CLOSING;
749 sfc_sw_xstats_close(sa);
755 sa->state = SFC_ETHDEV_INITIALIZED;
756 sfc_log_init(sa, "done");
760 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
762 struct rte_eth_dev *eth_dev = sa->eth_dev;
763 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
764 efsys_bar_t *ebp = &sa->mem_bar;
765 struct rte_mem_resource *res =
766 &pci_dev->mem_resource[mem_ebrp->ebr_index];
768 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
769 ebp->esb_rid = mem_ebrp->ebr_index;
770 ebp->esb_dev = pci_dev;
771 ebp->esb_base = res->addr;
773 sa->fcw_offset = mem_ebrp->ebr_offset;
779 sfc_mem_bar_fini(struct sfc_adapter *sa)
781 efsys_bar_t *ebp = &sa->mem_bar;
783 SFC_BAR_LOCK_DESTROY(ebp);
784 memset(ebp, 0, sizeof(*ebp));
788 * A fixed RSS key which has a property of being symmetric
789 * (symmetrical flows are distributed to the same CPU)
790 * and also known to give a uniform distribution
791 * (a good distribution of traffic between different CPUs)
793 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
794 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
795 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
796 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
797 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
798 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
802 sfc_rss_attach(struct sfc_adapter *sa)
804 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
807 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
811 rc = efx_ev_init(sa->nic);
815 rc = efx_rx_init(sa->nic);
819 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
821 goto fail_scale_support_get;
823 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
825 goto fail_hash_support_get;
827 rc = sfc_rx_hash_init(sa);
829 goto fail_rx_hash_init;
831 efx_rx_fini(sa->nic);
832 efx_ev_fini(sa->nic);
833 efx_intr_fini(sa->nic);
835 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
836 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
841 fail_hash_support_get:
842 fail_scale_support_get:
843 efx_rx_fini(sa->nic);
846 efx_ev_fini(sa->nic);
849 efx_intr_fini(sa->nic);
856 sfc_rss_detach(struct sfc_adapter *sa)
858 sfc_rx_hash_fini(sa);
862 sfc_attach(struct sfc_adapter *sa)
864 const efx_nic_cfg_t *encp;
865 efx_nic_t *enp = sa->nic;
868 sfc_log_init(sa, "entry");
870 SFC_ASSERT(sfc_adapter_is_locked(sa));
872 efx_mcdi_new_epoch(enp);
874 sfc_log_init(sa, "reset nic");
875 rc = efx_nic_reset(enp);
879 rc = sfc_sriov_attach(sa);
881 goto fail_sriov_attach;
884 * Probed NIC is sufficient for tunnel init.
885 * Initialize tunnel support to be able to use libefx
886 * efx_tunnel_config_udp_{add,remove}() in any state and
887 * efx_tunnel_reconfigure() on start up.
889 rc = efx_tunnel_init(enp);
891 goto fail_tunnel_init;
893 encp = efx_nic_cfg_get(sa->nic);
896 * Make a copy of supported tunnel encapsulations in shared
897 * memory to be used on supported Rx packet type classes get.
899 sa->priv.shared->tunnel_encaps =
900 encp->enc_tunnel_encapsulations_supported;
902 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
903 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
904 encp->enc_tso_v3_enabled;
906 sfc_info(sa, "TSO support isn't available on this adapter");
910 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
911 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
912 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
913 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
914 encp->enc_tso_v3_enabled;
916 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
919 sfc_log_init(sa, "estimate resource limits");
920 rc = sfc_estimate_resource_limits(sa);
922 goto fail_estimate_rsrc_limits;
924 sa->evq_max_entries = encp->enc_evq_max_nevs;
925 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
927 sa->evq_min_entries = encp->enc_evq_min_nevs;
928 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
930 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
931 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
933 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
934 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
936 sa->txq_max_entries = encp->enc_txq_max_ndescs;
937 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
939 sa->txq_min_entries = encp->enc_txq_min_ndescs;
940 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
942 rc = sfc_intr_attach(sa);
944 goto fail_intr_attach;
946 rc = sfc_ev_attach(sa);
950 rc = sfc_port_attach(sa);
952 goto fail_port_attach;
954 rc = sfc_rss_attach(sa);
956 goto fail_rss_attach;
958 rc = sfc_filter_attach(sa);
960 goto fail_filter_attach;
962 rc = sfc_mae_counter_rxq_attach(sa);
964 goto fail_mae_counter_rxq_attach;
966 rc = sfc_mae_attach(sa);
968 goto fail_mae_attach;
970 rc = sfc_mae_switchdev_init(sa);
972 goto fail_mae_switchdev_init;
974 rc = sfc_repr_proxy_attach(sa);
976 goto fail_repr_proxy_attach;
978 sfc_log_init(sa, "fini nic");
983 rc = sfc_sw_xstats_init(sa);
985 goto fail_sw_xstats_init;
988 * Create vSwitch to be able to use VFs when PF is not started yet
989 * as DPDK port. VFs should be able to talk to each other even
992 rc = sfc_sriov_vswitch_create(sa);
994 goto fail_sriov_vswitch_create;
996 sa->state = SFC_ETHDEV_INITIALIZED;
998 sfc_log_init(sa, "done");
1001 fail_sriov_vswitch_create:
1002 sfc_sw_xstats_close(sa);
1004 fail_sw_xstats_init:
1006 sfc_repr_proxy_detach(sa);
1008 fail_repr_proxy_attach:
1009 sfc_mae_switchdev_fini(sa);
1011 fail_mae_switchdev_init:
1015 sfc_mae_counter_rxq_detach(sa);
1017 fail_mae_counter_rxq_attach:
1018 sfc_filter_detach(sa);
1024 sfc_port_detach(sa);
1030 sfc_intr_detach(sa);
1033 efx_nic_fini(sa->nic);
1035 fail_estimate_rsrc_limits:
1037 efx_tunnel_fini(sa->nic);
1038 sfc_sriov_detach(sa);
1043 sfc_log_init(sa, "failed %d", rc);
1048 sfc_pre_detach(struct sfc_adapter *sa)
1050 sfc_log_init(sa, "entry");
1052 SFC_ASSERT(!sfc_adapter_is_locked(sa));
1054 sfc_repr_proxy_pre_detach(sa);
1056 sfc_log_init(sa, "done");
1060 sfc_detach(struct sfc_adapter *sa)
1062 sfc_log_init(sa, "entry");
1064 SFC_ASSERT(sfc_adapter_is_locked(sa));
1066 sfc_sriov_vswitch_destroy(sa);
1070 sfc_repr_proxy_detach(sa);
1071 sfc_mae_switchdev_fini(sa);
1073 sfc_mae_counter_rxq_detach(sa);
1074 sfc_filter_detach(sa);
1076 sfc_port_detach(sa);
1078 sfc_intr_detach(sa);
1079 efx_tunnel_fini(sa->nic);
1080 sfc_sriov_detach(sa);
1082 sa->state = SFC_ETHDEV_UNINITIALIZED;
1086 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
1087 const char *value_str, void *opaque)
1089 uint32_t *value = opaque;
1091 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
1092 *value = EFX_FW_VARIANT_DONT_CARE;
1093 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
1094 *value = EFX_FW_VARIANT_FULL_FEATURED;
1095 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
1096 *value = EFX_FW_VARIANT_LOW_LATENCY;
1097 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
1098 *value = EFX_FW_VARIANT_PACKED_STREAM;
1099 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
1100 *value = EFX_FW_VARIANT_DPDK;
1108 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
1110 efx_nic_fw_info_t enfi;
1113 rc = efx_nic_get_fw_version(sa->nic, &enfi);
1116 else if (!enfi.enfi_dpcpu_fw_ids_valid)
1120 * Firmware variant can be uniquely identified by the RxDPCPU
1123 switch (enfi.enfi_rx_dpcpu_fw_id) {
1124 case EFX_RXDP_FULL_FEATURED_FW_ID:
1125 *efv = EFX_FW_VARIANT_FULL_FEATURED;
1128 case EFX_RXDP_LOW_LATENCY_FW_ID:
1129 *efv = EFX_FW_VARIANT_LOW_LATENCY;
1132 case EFX_RXDP_PACKED_STREAM_FW_ID:
1133 *efv = EFX_FW_VARIANT_PACKED_STREAM;
1136 case EFX_RXDP_DPDK_FW_ID:
1137 *efv = EFX_FW_VARIANT_DPDK;
1142 * Other firmware variants are not considered, since they are
1143 * not supported in the device parameters
1145 *efv = EFX_FW_VARIANT_DONT_CARE;
1153 sfc_fw_variant2str(efx_fw_variant_t efv)
1156 case EFX_RXDP_FULL_FEATURED_FW_ID:
1157 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1158 case EFX_RXDP_LOW_LATENCY_FW_ID:
1159 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1160 case EFX_RXDP_PACKED_STREAM_FW_ID:
1161 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1162 case EFX_RXDP_DPDK_FW_ID:
1163 return SFC_KVARG_FW_VARIANT_DPDK;
1170 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1175 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1177 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1178 sfc_kvarg_long_handler, &value);
1183 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1184 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1185 "was set (%ld);", value);
1186 sfc_err(sa, "it must not be less than 0 or greater than %u",
1187 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1191 sa->rxd_wait_timeout_ns = value;
1196 sfc_nic_probe(struct sfc_adapter *sa)
1198 efx_nic_t *enp = sa->nic;
1199 efx_fw_variant_t preferred_efv;
1200 efx_fw_variant_t efv;
1203 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1204 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1205 sfc_kvarg_fv_variant_handler,
1208 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1212 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1216 rc = efx_nic_probe(enp, preferred_efv);
1218 /* Unprivileged functions cannot set FW variant */
1219 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1224 rc = sfc_get_fw_variant(sa, &efv);
1225 if (rc == ENOTSUP) {
1226 sfc_warn(sa, "FW variant can not be obtained");
1232 /* Check that firmware variant was changed to the requested one */
1233 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1234 sfc_warn(sa, "FW variant has not changed to the requested %s",
1235 sfc_fw_variant2str(preferred_efv));
1238 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1244 sfc_probe(struct sfc_adapter *sa)
1246 efx_bar_region_t mem_ebrp;
1247 struct rte_eth_dev *eth_dev = sa->eth_dev;
1248 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1252 sfc_log_init(sa, "entry");
1254 SFC_ASSERT(sfc_adapter_is_locked(sa));
1256 sa->socket_id = rte_socket_id();
1257 rte_atomic32_init(&sa->restart_required);
1259 sfc_log_init(sa, "get family");
1260 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1265 "family is %u, membar is %u, function control window offset is %lu",
1266 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1268 sfc_log_init(sa, "init mem bar");
1269 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1271 goto fail_mem_bar_init;
1273 sfc_log_init(sa, "create nic");
1274 rte_spinlock_init(&sa->nic_lock);
1275 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1276 &sa->mem_bar, mem_ebrp.ebr_offset,
1277 &sa->nic_lock, &enp);
1279 goto fail_nic_create;
1282 rc = sfc_mcdi_init(sa);
1284 goto fail_mcdi_init;
1286 sfc_log_init(sa, "probe nic");
1287 rc = sfc_nic_probe(sa);
1289 goto fail_nic_probe;
1291 sfc_log_init(sa, "done");
1298 sfc_log_init(sa, "destroy nic");
1300 efx_nic_destroy(enp);
1303 sfc_mem_bar_fini(sa);
1307 sfc_log_init(sa, "failed %d", rc);
1312 sfc_unprobe(struct sfc_adapter *sa)
1314 efx_nic_t *enp = sa->nic;
1316 sfc_log_init(sa, "entry");
1318 SFC_ASSERT(sfc_adapter_is_locked(sa));
1320 sfc_log_init(sa, "unprobe nic");
1321 efx_nic_unprobe(enp);
1326 * Make sure there is no pending alarm to restart since we are
1327 * going to free device private which is passed as the callback
1328 * opaque data. A new alarm cannot be scheduled since MCDI is
1331 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1333 sfc_mae_clear_switch_port(sa->mae.switch_domain_id,
1334 sa->mae.switch_port_id);
1336 sfc_log_init(sa, "destroy nic");
1338 efx_nic_destroy(enp);
1340 sfc_mem_bar_fini(sa);
1343 sa->state = SFC_ETHDEV_UNINITIALIZED;
1347 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1348 const char *lt_prefix_str, uint32_t ll_default)
1350 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1351 size_t lt_str_size_max;
1352 char *lt_str = NULL;
1355 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1356 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1357 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1359 return sfc_logtype_driver;
1362 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1364 return sfc_logtype_driver;
1366 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1367 lt_str[lt_prefix_str_size - 1] = '.';
1368 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1369 lt_str_size_max - lt_prefix_str_size);
1370 lt_str[lt_str_size_max - 1] = '\0';
1372 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1376 return sfc_logtype_driver;
1381 struct sfc_hw_switch_id {
1382 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1386 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1387 struct sfc_hw_switch_id **idp)
1389 efx_nic_board_info_t board_info;
1390 struct sfc_hw_switch_id *id;
1396 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1400 rc = efx_nic_get_board_info(sa->nic, &board_info);
1404 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1412 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1413 struct sfc_hw_switch_id *id)
1419 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1420 const struct sfc_hw_switch_id *right)
1422 return strncmp(left->board_sn, right->board_sn,
1423 sizeof(left->board_sn)) == 0;