1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright(c) 2019-2021 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
19 #include "sfc_debug.h"
23 #include "sfc_mae_counter.h"
25 #include "sfc_kvargs.h"
26 #include "sfc_tweak.h"
27 #include "sfc_sw_stats.h"
31 sfc_repr_supported(const struct sfc_adapter *sa)
37 * Representor proxy should use service lcore on PF's socket
38 * (sa->socket_id) to be efficient. But the proxy will fall back
39 * to any socket if it is not possible to get the service core
40 * on the same socket. Check that at least service core on any
41 * socket is available.
43 if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
50 sfc_repr_available(const struct sfc_adapter_shared *sas)
52 return sas->nb_repr_rxq > 0 && sas->nb_repr_txq > 0;
56 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
57 size_t len, int socket_id, efsys_mem_t *esmp)
59 const struct rte_memzone *mz;
61 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
62 name, id, len, socket_id);
64 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
65 sysconf(_SC_PAGESIZE), socket_id);
67 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
68 name, (unsigned int)id, (unsigned int)len, socket_id,
69 rte_strerror(rte_errno));
73 esmp->esm_addr = mz->iova;
74 if (esmp->esm_addr == RTE_BAD_IOVA) {
75 (void)rte_memzone_free(mz);
80 esmp->esm_base = mz->addr;
83 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
84 name, id, len, socket_id, esmp->esm_base,
85 (unsigned long)esmp->esm_addr);
91 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
95 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
97 rc = rte_memzone_free(esmp->esm_mz);
99 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
101 memset(esmp, 0, sizeof(*esmp));
105 sfc_phy_cap_from_link_speeds(uint32_t speeds)
107 uint32_t phy_caps = 0;
109 if (~speeds & ETH_LINK_SPEED_FIXED) {
110 phy_caps |= (1 << EFX_PHY_CAP_AN);
112 * If no speeds are specified in the mask, any supported
115 if (speeds == ETH_LINK_SPEED_AUTONEG)
117 (1 << EFX_PHY_CAP_1000FDX) |
118 (1 << EFX_PHY_CAP_10000FDX) |
119 (1 << EFX_PHY_CAP_25000FDX) |
120 (1 << EFX_PHY_CAP_40000FDX) |
121 (1 << EFX_PHY_CAP_50000FDX) |
122 (1 << EFX_PHY_CAP_100000FDX);
124 if (speeds & ETH_LINK_SPEED_1G)
125 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
126 if (speeds & ETH_LINK_SPEED_10G)
127 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
128 if (speeds & ETH_LINK_SPEED_25G)
129 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
130 if (speeds & ETH_LINK_SPEED_40G)
131 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
132 if (speeds & ETH_LINK_SPEED_50G)
133 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
134 if (speeds & ETH_LINK_SPEED_100G)
135 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
141 * Check requested device level configuration.
142 * Receive and transmit configuration is checked in corresponding
146 sfc_check_conf(struct sfc_adapter *sa)
148 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
151 sa->port.phy_adv_cap =
152 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
153 sa->port.phy_adv_cap_mask;
154 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
155 sfc_err(sa, "No link speeds from mask %#x are supported",
160 #if !EFSYS_OPT_LOOPBACK
161 if (conf->lpbk_mode != 0) {
162 sfc_err(sa, "Loopback not supported");
167 if (conf->dcb_capability_en != 0) {
168 sfc_err(sa, "Priority-based flow control not supported");
172 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
173 sfc_err(sa, "Flow Director not supported");
177 if ((conf->intr_conf.lsc != 0) &&
178 (sa->intr.type != EFX_INTR_LINE) &&
179 (sa->intr.type != EFX_INTR_MESSAGE)) {
180 sfc_err(sa, "Link status change interrupt not supported");
184 if (conf->intr_conf.rxq != 0 &&
185 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
186 sfc_err(sa, "Receive queue interrupt not supported");
194 * Find out maximum number of receive and transmit queues which could be
197 * NIC is kept initialized on success to allow other modules acquire
198 * defaults and capabilities.
201 sfc_estimate_resource_limits(struct sfc_adapter *sa)
203 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
204 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
205 efx_drv_limits_t limits;
207 uint32_t evq_allocated;
208 uint32_t rxq_allocated;
209 uint32_t txq_allocated;
211 memset(&limits, 0, sizeof(limits));
213 /* Request at least one Rx and Tx queue */
214 limits.edl_min_rxq_count = 1;
215 limits.edl_min_txq_count = 1;
216 /* Management event queue plus event queue for each Tx and Rx queue */
217 limits.edl_min_evq_count =
218 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
220 /* Divide by number of functions to guarantee that all functions
221 * will get promised resources
223 /* FIXME Divide by number of functions (not 2) below */
224 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
225 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
227 /* Split equally between receive and transmit */
228 limits.edl_max_rxq_count =
229 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
230 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
232 limits.edl_max_txq_count =
233 MIN(encp->enc_txq_limit,
234 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
236 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
237 limits.edl_max_txq_count =
238 MIN(limits.edl_max_txq_count,
239 encp->enc_fw_assisted_tso_v2_n_contexts /
240 encp->enc_hw_pf_count);
242 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
244 /* Configure the minimum required resources needed for the
245 * driver to operate, and the maximum desired resources that the
246 * driver is capable of using.
248 efx_nic_set_drv_limits(sa->nic, &limits);
250 sfc_log_init(sa, "init nic");
251 rc = efx_nic_init(sa->nic);
255 /* Find resource dimensions assigned by firmware to this function */
256 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
259 goto fail_get_vi_pool;
261 /* It still may allocate more than maximum, ensure limit */
262 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
263 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
264 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
267 * Subtract management EVQ not used for traffic
268 * The resource allocation strategy is as follows:
269 * - one EVQ for management
270 * - one EVQ for each ethdev RXQ
271 * - one EVQ for each ethdev TXQ
272 * - one EVQ and one RXQ for optional MAE counters.
274 if (evq_allocated == 0) {
275 sfc_err(sa, "count of allocated EvQ is 0");
277 goto fail_allocate_evq;
282 * Reserve absolutely required minimum.
283 * Right now we use separate EVQ for Rx and Tx.
285 if (rxq_allocated > 0 && evq_allocated > 0) {
290 if (txq_allocated > 0 && evq_allocated > 0) {
296 if (sfc_mae_counter_rxq_required(sa) &&
297 rxq_allocated > 0 && evq_allocated > 0) {
300 sas->counters_rxq_allocated = true;
302 sas->counters_rxq_allocated = false;
305 if (sfc_repr_supported(sa) &&
306 evq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN +
307 SFC_REPR_PROXY_NB_TXQ_MIN &&
308 rxq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN &&
309 txq_allocated >= SFC_REPR_PROXY_NB_TXQ_MIN) {
312 txq_allocated -= SFC_REPR_PROXY_NB_TXQ_MIN;
313 rxq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN;
314 evq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN +
315 SFC_REPR_PROXY_NB_TXQ_MIN;
317 sas->nb_repr_rxq = SFC_REPR_PROXY_NB_RXQ_MIN;
318 sas->nb_repr_txq = SFC_REPR_PROXY_NB_TXQ_MIN;
320 /* Allocate extra representor RxQs up to the maximum */
321 extra = MIN(evq_allocated, rxq_allocated);
323 SFC_REPR_PROXY_NB_RXQ_MAX - sas->nb_repr_rxq);
324 evq_allocated -= extra;
325 rxq_allocated -= extra;
326 sas->nb_repr_rxq += extra;
328 /* Allocate extra representor TxQs up to the maximum */
329 extra = MIN(evq_allocated, txq_allocated);
331 SFC_REPR_PROXY_NB_TXQ_MAX - sas->nb_repr_txq);
332 evq_allocated -= extra;
333 txq_allocated -= extra;
334 sas->nb_repr_txq += extra;
336 sas->nb_repr_rxq = 0;
337 sas->nb_repr_txq = 0;
340 /* Add remaining allocated queues */
341 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2);
342 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max);
344 /* Keep NIC initialized */
349 efx_nic_fini(sa->nic);
355 sfc_set_drv_limits(struct sfc_adapter *sa)
357 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
358 const struct rte_eth_dev_data *data = sa->eth_dev->data;
359 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sas);
360 uint32_t txq_reserved = sfc_nb_txq_reserved(sas);
361 efx_drv_limits_t lim;
363 memset(&lim, 0, sizeof(lim));
366 * Limits are strict since take into account initial estimation.
367 * Resource allocation stategy is described in
368 * sfc_estimate_resource_limits().
370 lim.edl_min_evq_count = lim.edl_max_evq_count =
371 1 + data->nb_rx_queues + data->nb_tx_queues +
372 rxq_reserved + txq_reserved;
373 lim.edl_min_rxq_count = lim.edl_max_rxq_count =
374 data->nb_rx_queues + rxq_reserved;
375 lim.edl_min_txq_count = lim.edl_max_txq_count =
376 data->nb_tx_queues + txq_reserved;
378 return efx_nic_set_drv_limits(sa->nic, &lim);
382 sfc_set_fw_subvariant(struct sfc_adapter *sa)
384 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
385 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
386 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
387 unsigned int txq_index;
388 efx_nic_fw_subvariant_t req_fw_subvariant;
389 efx_nic_fw_subvariant_t cur_fw_subvariant;
392 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
393 sfc_info(sa, "no-Tx-checksum subvariant not supported");
397 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
398 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
400 if (txq_info->state & SFC_TXQ_INITIALIZED)
401 tx_offloads |= txq_info->offloads;
404 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
405 DEV_TX_OFFLOAD_TCP_CKSUM |
406 DEV_TX_OFFLOAD_UDP_CKSUM |
407 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
408 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
410 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
412 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
414 sfc_err(sa, "failed to get FW subvariant: %d", rc);
417 sfc_info(sa, "FW subvariant is %u vs required %u",
418 cur_fw_subvariant, req_fw_subvariant);
420 if (cur_fw_subvariant == req_fw_subvariant)
423 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
425 sfc_err(sa, "failed to set FW subvariant %u: %d",
426 req_fw_subvariant, rc);
429 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
435 sfc_try_start(struct sfc_adapter *sa)
437 const efx_nic_cfg_t *encp;
440 sfc_log_init(sa, "entry");
442 SFC_ASSERT(sfc_adapter_is_locked(sa));
443 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
445 sfc_log_init(sa, "set FW subvariant");
446 rc = sfc_set_fw_subvariant(sa);
448 goto fail_set_fw_subvariant;
450 sfc_log_init(sa, "set resource limits");
451 rc = sfc_set_drv_limits(sa);
453 goto fail_set_drv_limits;
455 sfc_log_init(sa, "init nic");
456 rc = efx_nic_init(sa->nic);
460 encp = efx_nic_cfg_get(sa->nic);
463 * Refresh (since it may change on NIC reset/restart) a copy of
464 * supported tunnel encapsulations in shared memory to be used
465 * on supported Rx packet type classes get.
467 sa->priv.shared->tunnel_encaps =
468 encp->enc_tunnel_encapsulations_supported;
470 if (encp->enc_tunnel_encapsulations_supported != 0) {
471 sfc_log_init(sa, "apply tunnel config");
472 rc = efx_tunnel_reconfigure(sa->nic);
474 goto fail_tunnel_reconfigure;
477 rc = sfc_intr_start(sa);
479 goto fail_intr_start;
481 rc = sfc_ev_start(sa);
485 rc = sfc_port_start(sa);
487 goto fail_port_start;
489 rc = sfc_rx_start(sa);
493 rc = sfc_tx_start(sa);
497 rc = sfc_flow_start(sa);
499 goto fail_flows_insert;
501 rc = sfc_repr_proxy_start(sa);
503 goto fail_repr_proxy_start;
505 sfc_log_init(sa, "done");
508 fail_repr_proxy_start:
527 fail_tunnel_reconfigure:
528 efx_nic_fini(sa->nic);
532 fail_set_fw_subvariant:
533 sfc_log_init(sa, "failed %d", rc);
538 sfc_start(struct sfc_adapter *sa)
540 unsigned int start_tries = 3;
543 sfc_log_init(sa, "entry");
545 SFC_ASSERT(sfc_adapter_is_locked(sa));
548 case SFC_ADAPTER_CONFIGURED:
550 case SFC_ADAPTER_STARTED:
551 sfc_notice(sa, "already started");
558 sa->state = SFC_ADAPTER_STARTING;
563 * FIXME Try to recreate vSwitch on start retry.
564 * vSwitch is absent after MC reboot like events and
565 * we should recreate it. May be we need proper
566 * indication instead of guessing.
569 sfc_sriov_vswitch_destroy(sa);
570 rc = sfc_sriov_vswitch_create(sa);
572 goto fail_sriov_vswitch_create;
574 rc = sfc_try_start(sa);
575 } while ((--start_tries > 0) &&
576 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
581 sa->state = SFC_ADAPTER_STARTED;
582 sfc_log_init(sa, "done");
586 fail_sriov_vswitch_create:
587 sa->state = SFC_ADAPTER_CONFIGURED;
589 sfc_log_init(sa, "failed %d", rc);
594 sfc_stop(struct sfc_adapter *sa)
596 sfc_log_init(sa, "entry");
598 SFC_ASSERT(sfc_adapter_is_locked(sa));
601 case SFC_ADAPTER_STARTED:
603 case SFC_ADAPTER_CONFIGURED:
604 sfc_notice(sa, "already stopped");
607 sfc_err(sa, "stop in unexpected state %u", sa->state);
612 sa->state = SFC_ADAPTER_STOPPING;
614 sfc_repr_proxy_stop(sa);
621 efx_nic_fini(sa->nic);
623 sa->state = SFC_ADAPTER_CONFIGURED;
624 sfc_log_init(sa, "done");
628 sfc_restart(struct sfc_adapter *sa)
632 SFC_ASSERT(sfc_adapter_is_locked(sa));
634 if (sa->state != SFC_ADAPTER_STARTED)
641 sfc_err(sa, "restart failed");
647 sfc_restart_if_required(void *arg)
649 struct sfc_adapter *sa = arg;
651 /* If restart is scheduled, clear the flag and do it */
652 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
654 sfc_adapter_lock(sa);
655 if (sa->state == SFC_ADAPTER_STARTED)
656 (void)sfc_restart(sa);
657 sfc_adapter_unlock(sa);
662 sfc_schedule_restart(struct sfc_adapter *sa)
666 /* Schedule restart alarm if it is not scheduled yet */
667 if (!rte_atomic32_test_and_set(&sa->restart_required))
670 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
672 sfc_warn(sa, "alarms are not supported, restart is pending");
674 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
676 sfc_notice(sa, "restart scheduled");
680 sfc_configure(struct sfc_adapter *sa)
684 sfc_log_init(sa, "entry");
686 SFC_ASSERT(sfc_adapter_is_locked(sa));
688 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
689 sa->state == SFC_ADAPTER_CONFIGURED);
690 sa->state = SFC_ADAPTER_CONFIGURING;
692 rc = sfc_check_conf(sa);
694 goto fail_check_conf;
696 rc = sfc_intr_configure(sa);
698 goto fail_intr_configure;
700 rc = sfc_port_configure(sa);
702 goto fail_port_configure;
704 rc = sfc_rx_configure(sa);
706 goto fail_rx_configure;
708 rc = sfc_tx_configure(sa);
710 goto fail_tx_configure;
712 rc = sfc_sw_xstats_configure(sa);
714 goto fail_sw_xstats_configure;
716 sa->state = SFC_ADAPTER_CONFIGURED;
717 sfc_log_init(sa, "done");
720 fail_sw_xstats_configure:
734 sa->state = SFC_ADAPTER_INITIALIZED;
735 sfc_log_init(sa, "failed %d", rc);
740 sfc_close(struct sfc_adapter *sa)
742 sfc_log_init(sa, "entry");
744 SFC_ASSERT(sfc_adapter_is_locked(sa));
746 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
747 sa->state = SFC_ADAPTER_CLOSING;
749 sfc_sw_xstats_close(sa);
755 sa->state = SFC_ADAPTER_INITIALIZED;
756 sfc_log_init(sa, "done");
760 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
762 struct rte_eth_dev *eth_dev = sa->eth_dev;
763 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
764 efsys_bar_t *ebp = &sa->mem_bar;
765 struct rte_mem_resource *res =
766 &pci_dev->mem_resource[mem_ebrp->ebr_index];
768 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
769 ebp->esb_rid = mem_ebrp->ebr_index;
770 ebp->esb_dev = pci_dev;
771 ebp->esb_base = res->addr;
773 sa->fcw_offset = mem_ebrp->ebr_offset;
779 sfc_mem_bar_fini(struct sfc_adapter *sa)
781 efsys_bar_t *ebp = &sa->mem_bar;
783 SFC_BAR_LOCK_DESTROY(ebp);
784 memset(ebp, 0, sizeof(*ebp));
788 * A fixed RSS key which has a property of being symmetric
789 * (symmetrical flows are distributed to the same CPU)
790 * and also known to give a uniform distribution
791 * (a good distribution of traffic between different CPUs)
793 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
794 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
795 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
796 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
797 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
798 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
802 sfc_rss_attach(struct sfc_adapter *sa)
804 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
807 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
811 rc = efx_ev_init(sa->nic);
815 rc = efx_rx_init(sa->nic);
819 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
821 goto fail_scale_support_get;
823 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
825 goto fail_hash_support_get;
827 rc = sfc_rx_hash_init(sa);
829 goto fail_rx_hash_init;
831 efx_rx_fini(sa->nic);
832 efx_ev_fini(sa->nic);
833 efx_intr_fini(sa->nic);
835 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
836 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
841 fail_hash_support_get:
842 fail_scale_support_get:
843 efx_rx_fini(sa->nic);
846 efx_ev_fini(sa->nic);
849 efx_intr_fini(sa->nic);
856 sfc_rss_detach(struct sfc_adapter *sa)
858 sfc_rx_hash_fini(sa);
862 sfc_attach(struct sfc_adapter *sa)
864 const efx_nic_cfg_t *encp;
865 efx_nic_t *enp = sa->nic;
868 sfc_log_init(sa, "entry");
870 SFC_ASSERT(sfc_adapter_is_locked(sa));
872 efx_mcdi_new_epoch(enp);
874 sfc_log_init(sa, "reset nic");
875 rc = efx_nic_reset(enp);
879 rc = sfc_sriov_attach(sa);
881 goto fail_sriov_attach;
884 * Probed NIC is sufficient for tunnel init.
885 * Initialize tunnel support to be able to use libefx
886 * efx_tunnel_config_udp_{add,remove}() in any state and
887 * efx_tunnel_reconfigure() on start up.
889 rc = efx_tunnel_init(enp);
891 goto fail_tunnel_init;
893 encp = efx_nic_cfg_get(sa->nic);
896 * Make a copy of supported tunnel encapsulations in shared
897 * memory to be used on supported Rx packet type classes get.
899 sa->priv.shared->tunnel_encaps =
900 encp->enc_tunnel_encapsulations_supported;
902 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
903 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
904 encp->enc_tso_v3_enabled;
906 sfc_info(sa, "TSO support isn't available on this adapter");
910 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
911 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
912 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
913 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
914 encp->enc_tso_v3_enabled;
916 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
919 sfc_log_init(sa, "estimate resource limits");
920 rc = sfc_estimate_resource_limits(sa);
922 goto fail_estimate_rsrc_limits;
924 sa->evq_max_entries = encp->enc_evq_max_nevs;
925 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
927 sa->evq_min_entries = encp->enc_evq_min_nevs;
928 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
930 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
931 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
933 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
934 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
936 sa->txq_max_entries = encp->enc_txq_max_ndescs;
937 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
939 sa->txq_min_entries = encp->enc_txq_min_ndescs;
940 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
942 rc = sfc_intr_attach(sa);
944 goto fail_intr_attach;
946 rc = sfc_ev_attach(sa);
950 rc = sfc_port_attach(sa);
952 goto fail_port_attach;
954 rc = sfc_rss_attach(sa);
956 goto fail_rss_attach;
958 rc = sfc_filter_attach(sa);
960 goto fail_filter_attach;
962 rc = sfc_mae_counter_rxq_attach(sa);
964 goto fail_mae_counter_rxq_attach;
966 rc = sfc_mae_attach(sa);
968 goto fail_mae_attach;
970 rc = sfc_mae_switchdev_init(sa);
972 goto fail_mae_switchdev_init;
974 rc = sfc_repr_proxy_attach(sa);
976 goto fail_repr_proxy_attach;
978 sfc_log_init(sa, "fini nic");
983 rc = sfc_sw_xstats_init(sa);
985 goto fail_sw_xstats_init;
988 * Create vSwitch to be able to use VFs when PF is not started yet
989 * as DPDK port. VFs should be able to talk to each other even
992 rc = sfc_sriov_vswitch_create(sa);
994 goto fail_sriov_vswitch_create;
996 sa->state = SFC_ADAPTER_INITIALIZED;
998 sfc_log_init(sa, "done");
1001 fail_sriov_vswitch_create:
1002 sfc_sw_xstats_close(sa);
1004 fail_sw_xstats_init:
1006 sfc_repr_proxy_detach(sa);
1008 fail_repr_proxy_attach:
1009 sfc_mae_switchdev_fini(sa);
1011 fail_mae_switchdev_init:
1015 sfc_mae_counter_rxq_detach(sa);
1017 fail_mae_counter_rxq_attach:
1018 sfc_filter_detach(sa);
1024 sfc_port_detach(sa);
1030 sfc_intr_detach(sa);
1033 efx_nic_fini(sa->nic);
1035 fail_estimate_rsrc_limits:
1037 efx_tunnel_fini(sa->nic);
1038 sfc_sriov_detach(sa);
1043 sfc_log_init(sa, "failed %d", rc);
1048 sfc_detach(struct sfc_adapter *sa)
1050 sfc_log_init(sa, "entry");
1052 SFC_ASSERT(sfc_adapter_is_locked(sa));
1054 sfc_sriov_vswitch_destroy(sa);
1058 sfc_repr_proxy_detach(sa);
1059 sfc_mae_switchdev_fini(sa);
1061 sfc_mae_counter_rxq_detach(sa);
1062 sfc_filter_detach(sa);
1064 sfc_port_detach(sa);
1066 sfc_intr_detach(sa);
1067 efx_tunnel_fini(sa->nic);
1068 sfc_sriov_detach(sa);
1070 sa->state = SFC_ADAPTER_UNINITIALIZED;
1074 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
1075 const char *value_str, void *opaque)
1077 uint32_t *value = opaque;
1079 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
1080 *value = EFX_FW_VARIANT_DONT_CARE;
1081 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
1082 *value = EFX_FW_VARIANT_FULL_FEATURED;
1083 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
1084 *value = EFX_FW_VARIANT_LOW_LATENCY;
1085 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
1086 *value = EFX_FW_VARIANT_PACKED_STREAM;
1087 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
1088 *value = EFX_FW_VARIANT_DPDK;
1096 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
1098 efx_nic_fw_info_t enfi;
1101 rc = efx_nic_get_fw_version(sa->nic, &enfi);
1104 else if (!enfi.enfi_dpcpu_fw_ids_valid)
1108 * Firmware variant can be uniquely identified by the RxDPCPU
1111 switch (enfi.enfi_rx_dpcpu_fw_id) {
1112 case EFX_RXDP_FULL_FEATURED_FW_ID:
1113 *efv = EFX_FW_VARIANT_FULL_FEATURED;
1116 case EFX_RXDP_LOW_LATENCY_FW_ID:
1117 *efv = EFX_FW_VARIANT_LOW_LATENCY;
1120 case EFX_RXDP_PACKED_STREAM_FW_ID:
1121 *efv = EFX_FW_VARIANT_PACKED_STREAM;
1124 case EFX_RXDP_DPDK_FW_ID:
1125 *efv = EFX_FW_VARIANT_DPDK;
1130 * Other firmware variants are not considered, since they are
1131 * not supported in the device parameters
1133 *efv = EFX_FW_VARIANT_DONT_CARE;
1141 sfc_fw_variant2str(efx_fw_variant_t efv)
1144 case EFX_RXDP_FULL_FEATURED_FW_ID:
1145 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1146 case EFX_RXDP_LOW_LATENCY_FW_ID:
1147 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1148 case EFX_RXDP_PACKED_STREAM_FW_ID:
1149 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1150 case EFX_RXDP_DPDK_FW_ID:
1151 return SFC_KVARG_FW_VARIANT_DPDK;
1158 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1163 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1165 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1166 sfc_kvarg_long_handler, &value);
1171 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1172 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1173 "was set (%ld);", value);
1174 sfc_err(sa, "it must not be less than 0 or greater than %u",
1175 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1179 sa->rxd_wait_timeout_ns = value;
1184 sfc_nic_probe(struct sfc_adapter *sa)
1186 efx_nic_t *enp = sa->nic;
1187 efx_fw_variant_t preferred_efv;
1188 efx_fw_variant_t efv;
1191 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1192 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1193 sfc_kvarg_fv_variant_handler,
1196 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1200 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1204 rc = efx_nic_probe(enp, preferred_efv);
1206 /* Unprivileged functions cannot set FW variant */
1207 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1212 rc = sfc_get_fw_variant(sa, &efv);
1213 if (rc == ENOTSUP) {
1214 sfc_warn(sa, "FW variant can not be obtained");
1220 /* Check that firmware variant was changed to the requested one */
1221 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1222 sfc_warn(sa, "FW variant has not changed to the requested %s",
1223 sfc_fw_variant2str(preferred_efv));
1226 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1232 sfc_probe(struct sfc_adapter *sa)
1234 efx_bar_region_t mem_ebrp;
1235 struct rte_eth_dev *eth_dev = sa->eth_dev;
1236 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1240 sfc_log_init(sa, "entry");
1242 SFC_ASSERT(sfc_adapter_is_locked(sa));
1244 sa->socket_id = rte_socket_id();
1245 rte_atomic32_init(&sa->restart_required);
1247 sfc_log_init(sa, "get family");
1248 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1253 "family is %u, membar is %u, function control window offset is %lu",
1254 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1256 sfc_log_init(sa, "init mem bar");
1257 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1259 goto fail_mem_bar_init;
1261 sfc_log_init(sa, "create nic");
1262 rte_spinlock_init(&sa->nic_lock);
1263 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1264 &sa->mem_bar, mem_ebrp.ebr_offset,
1265 &sa->nic_lock, &enp);
1267 goto fail_nic_create;
1270 rc = sfc_mcdi_init(sa);
1272 goto fail_mcdi_init;
1274 sfc_log_init(sa, "probe nic");
1275 rc = sfc_nic_probe(sa);
1277 goto fail_nic_probe;
1279 sfc_log_init(sa, "done");
1286 sfc_log_init(sa, "destroy nic");
1288 efx_nic_destroy(enp);
1291 sfc_mem_bar_fini(sa);
1295 sfc_log_init(sa, "failed %d", rc);
1300 sfc_unprobe(struct sfc_adapter *sa)
1302 efx_nic_t *enp = sa->nic;
1304 sfc_log_init(sa, "entry");
1306 SFC_ASSERT(sfc_adapter_is_locked(sa));
1308 sfc_log_init(sa, "unprobe nic");
1309 efx_nic_unprobe(enp);
1314 * Make sure there is no pending alarm to restart since we are
1315 * going to free device private which is passed as the callback
1316 * opaque data. A new alarm cannot be scheduled since MCDI is
1319 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1321 sfc_log_init(sa, "destroy nic");
1323 efx_nic_destroy(enp);
1325 sfc_mem_bar_fini(sa);
1328 sa->state = SFC_ADAPTER_UNINITIALIZED;
1332 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1333 const char *lt_prefix_str, uint32_t ll_default)
1335 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1336 size_t lt_str_size_max;
1337 char *lt_str = NULL;
1340 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1341 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1342 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1344 return sfc_logtype_driver;
1347 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1349 return sfc_logtype_driver;
1351 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1352 lt_str[lt_prefix_str_size - 1] = '.';
1353 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1354 lt_str_size_max - lt_prefix_str_size);
1355 lt_str[lt_str_size_max - 1] = '\0';
1357 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1361 return sfc_logtype_driver;
1366 struct sfc_hw_switch_id {
1367 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1371 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1372 struct sfc_hw_switch_id **idp)
1374 efx_nic_board_info_t board_info;
1375 struct sfc_hw_switch_id *id;
1381 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1385 rc = efx_nic_get_board_info(sa->nic, &board_info);
1389 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1397 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1398 struct sfc_hw_switch_id *id)
1404 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1405 const struct sfc_hw_switch_id *right)
1407 return strncmp(left->board_sn, right->board_sn,
1408 sizeof(left->board_sn)) == 0;