1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright(c) 2019-2021 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
19 #include "sfc_debug.h"
23 #include "sfc_mae_counter.h"
25 #include "sfc_kvargs.h"
26 #include "sfc_tweak.h"
27 #include "sfc_sw_stats.h"
30 sfc_repr_supported(const struct sfc_adapter *sa)
36 * Representor proxy should use service lcore on PF's socket
37 * (sa->socket_id) to be efficient. But the proxy will fall back
38 * to any socket if it is not possible to get the service core
39 * on the same socket. Check that at least service core on any
40 * socket is available.
42 if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
49 sfc_repr_available(const struct sfc_adapter_shared *sas)
51 return sas->nb_repr_rxq > 0 && sas->nb_repr_txq > 0;
55 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
56 size_t len, int socket_id, efsys_mem_t *esmp)
58 const struct rte_memzone *mz;
60 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
61 name, id, len, socket_id);
63 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
64 sysconf(_SC_PAGESIZE), socket_id);
66 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
67 name, (unsigned int)id, (unsigned int)len, socket_id,
68 rte_strerror(rte_errno));
72 esmp->esm_addr = mz->iova;
73 if (esmp->esm_addr == RTE_BAD_IOVA) {
74 (void)rte_memzone_free(mz);
79 esmp->esm_base = mz->addr;
82 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
83 name, id, len, socket_id, esmp->esm_base,
84 (unsigned long)esmp->esm_addr);
90 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
94 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
96 rc = rte_memzone_free(esmp->esm_mz);
98 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
100 memset(esmp, 0, sizeof(*esmp));
104 sfc_phy_cap_from_link_speeds(uint32_t speeds)
106 uint32_t phy_caps = 0;
108 if (~speeds & ETH_LINK_SPEED_FIXED) {
109 phy_caps |= (1 << EFX_PHY_CAP_AN);
111 * If no speeds are specified in the mask, any supported
114 if (speeds == ETH_LINK_SPEED_AUTONEG)
116 (1 << EFX_PHY_CAP_1000FDX) |
117 (1 << EFX_PHY_CAP_10000FDX) |
118 (1 << EFX_PHY_CAP_25000FDX) |
119 (1 << EFX_PHY_CAP_40000FDX) |
120 (1 << EFX_PHY_CAP_50000FDX) |
121 (1 << EFX_PHY_CAP_100000FDX);
123 if (speeds & ETH_LINK_SPEED_1G)
124 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
125 if (speeds & ETH_LINK_SPEED_10G)
126 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
127 if (speeds & ETH_LINK_SPEED_25G)
128 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
129 if (speeds & ETH_LINK_SPEED_40G)
130 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
131 if (speeds & ETH_LINK_SPEED_50G)
132 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
133 if (speeds & ETH_LINK_SPEED_100G)
134 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
140 * Check requested device level configuration.
141 * Receive and transmit configuration is checked in corresponding
145 sfc_check_conf(struct sfc_adapter *sa)
147 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
150 sa->port.phy_adv_cap =
151 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
152 sa->port.phy_adv_cap_mask;
153 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
154 sfc_err(sa, "No link speeds from mask %#x are supported",
159 #if !EFSYS_OPT_LOOPBACK
160 if (conf->lpbk_mode != 0) {
161 sfc_err(sa, "Loopback not supported");
166 if (conf->dcb_capability_en != 0) {
167 sfc_err(sa, "Priority-based flow control not supported");
171 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
172 sfc_err(sa, "Flow Director not supported");
176 if ((conf->intr_conf.lsc != 0) &&
177 (sa->intr.type != EFX_INTR_LINE) &&
178 (sa->intr.type != EFX_INTR_MESSAGE)) {
179 sfc_err(sa, "Link status change interrupt not supported");
183 if (conf->intr_conf.rxq != 0 &&
184 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
185 sfc_err(sa, "Receive queue interrupt not supported");
193 * Find out maximum number of receive and transmit queues which could be
196 * NIC is kept initialized on success to allow other modules acquire
197 * defaults and capabilities.
200 sfc_estimate_resource_limits(struct sfc_adapter *sa)
202 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
203 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
204 efx_drv_limits_t limits;
206 uint32_t evq_allocated;
207 uint32_t rxq_allocated;
208 uint32_t txq_allocated;
210 memset(&limits, 0, sizeof(limits));
212 /* Request at least one Rx and Tx queue */
213 limits.edl_min_rxq_count = 1;
214 limits.edl_min_txq_count = 1;
215 /* Management event queue plus event queue for each Tx and Rx queue */
216 limits.edl_min_evq_count =
217 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
219 /* Divide by number of functions to guarantee that all functions
220 * will get promised resources
222 /* FIXME Divide by number of functions (not 2) below */
223 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
224 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
226 /* Split equally between receive and transmit */
227 limits.edl_max_rxq_count =
228 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
229 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
231 limits.edl_max_txq_count =
232 MIN(encp->enc_txq_limit,
233 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
235 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
236 limits.edl_max_txq_count =
237 MIN(limits.edl_max_txq_count,
238 encp->enc_fw_assisted_tso_v2_n_contexts /
239 encp->enc_hw_pf_count);
241 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
243 /* Configure the minimum required resources needed for the
244 * driver to operate, and the maximum desired resources that the
245 * driver is capable of using.
247 efx_nic_set_drv_limits(sa->nic, &limits);
249 sfc_log_init(sa, "init nic");
250 rc = efx_nic_init(sa->nic);
254 /* Find resource dimensions assigned by firmware to this function */
255 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
258 goto fail_get_vi_pool;
260 /* It still may allocate more than maximum, ensure limit */
261 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
262 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
263 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
266 * Subtract management EVQ not used for traffic
267 * The resource allocation strategy is as follows:
268 * - one EVQ for management
269 * - one EVQ for each ethdev RXQ
270 * - one EVQ for each ethdev TXQ
271 * - one EVQ and one RXQ for optional MAE counters.
273 if (evq_allocated == 0) {
274 sfc_err(sa, "count of allocated EvQ is 0");
276 goto fail_allocate_evq;
281 * Reserve absolutely required minimum.
282 * Right now we use separate EVQ for Rx and Tx.
284 if (rxq_allocated > 0 && evq_allocated > 0) {
289 if (txq_allocated > 0 && evq_allocated > 0) {
295 if (sfc_mae_counter_rxq_required(sa) &&
296 rxq_allocated > 0 && evq_allocated > 0) {
299 sas->counters_rxq_allocated = true;
301 sas->counters_rxq_allocated = false;
304 if (sfc_repr_supported(sa) &&
305 evq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN +
306 SFC_REPR_PROXY_NB_TXQ_MIN &&
307 rxq_allocated >= SFC_REPR_PROXY_NB_RXQ_MIN &&
308 txq_allocated >= SFC_REPR_PROXY_NB_TXQ_MIN) {
311 txq_allocated -= SFC_REPR_PROXY_NB_TXQ_MIN;
312 rxq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN;
313 evq_allocated -= SFC_REPR_PROXY_NB_RXQ_MIN +
314 SFC_REPR_PROXY_NB_TXQ_MIN;
316 sas->nb_repr_rxq = SFC_REPR_PROXY_NB_RXQ_MIN;
317 sas->nb_repr_txq = SFC_REPR_PROXY_NB_TXQ_MIN;
319 /* Allocate extra representor RxQs up to the maximum */
320 extra = MIN(evq_allocated, rxq_allocated);
322 SFC_REPR_PROXY_NB_RXQ_MAX - sas->nb_repr_rxq);
323 evq_allocated -= extra;
324 rxq_allocated -= extra;
325 sas->nb_repr_rxq += extra;
327 /* Allocate extra representor TxQs up to the maximum */
328 extra = MIN(evq_allocated, txq_allocated);
330 SFC_REPR_PROXY_NB_TXQ_MAX - sas->nb_repr_txq);
331 evq_allocated -= extra;
332 txq_allocated -= extra;
333 sas->nb_repr_txq += extra;
335 sas->nb_repr_rxq = 0;
336 sas->nb_repr_txq = 0;
339 /* Add remaining allocated queues */
340 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2);
341 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max);
343 /* Keep NIC initialized */
348 efx_nic_fini(sa->nic);
354 sfc_set_drv_limits(struct sfc_adapter *sa)
356 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
357 const struct rte_eth_dev_data *data = sa->eth_dev->data;
358 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sas);
359 uint32_t txq_reserved = sfc_nb_txq_reserved(sas);
360 efx_drv_limits_t lim;
362 memset(&lim, 0, sizeof(lim));
365 * Limits are strict since take into account initial estimation.
366 * Resource allocation stategy is described in
367 * sfc_estimate_resource_limits().
369 lim.edl_min_evq_count = lim.edl_max_evq_count =
370 1 + data->nb_rx_queues + data->nb_tx_queues +
371 rxq_reserved + txq_reserved;
372 lim.edl_min_rxq_count = lim.edl_max_rxq_count =
373 data->nb_rx_queues + rxq_reserved;
374 lim.edl_min_txq_count = lim.edl_max_txq_count =
375 data->nb_tx_queues + txq_reserved;
377 return efx_nic_set_drv_limits(sa->nic, &lim);
381 sfc_set_fw_subvariant(struct sfc_adapter *sa)
383 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
384 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
385 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
386 unsigned int txq_index;
387 efx_nic_fw_subvariant_t req_fw_subvariant;
388 efx_nic_fw_subvariant_t cur_fw_subvariant;
391 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
392 sfc_info(sa, "no-Tx-checksum subvariant not supported");
396 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
397 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
399 if (txq_info->state & SFC_TXQ_INITIALIZED)
400 tx_offloads |= txq_info->offloads;
403 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
404 DEV_TX_OFFLOAD_TCP_CKSUM |
405 DEV_TX_OFFLOAD_UDP_CKSUM |
406 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
407 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
409 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
411 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
413 sfc_err(sa, "failed to get FW subvariant: %d", rc);
416 sfc_info(sa, "FW subvariant is %u vs required %u",
417 cur_fw_subvariant, req_fw_subvariant);
419 if (cur_fw_subvariant == req_fw_subvariant)
422 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
424 sfc_err(sa, "failed to set FW subvariant %u: %d",
425 req_fw_subvariant, rc);
428 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
434 sfc_try_start(struct sfc_adapter *sa)
436 const efx_nic_cfg_t *encp;
439 sfc_log_init(sa, "entry");
441 SFC_ASSERT(sfc_adapter_is_locked(sa));
442 SFC_ASSERT(sa->state == SFC_ETHDEV_STARTING);
444 sfc_log_init(sa, "set FW subvariant");
445 rc = sfc_set_fw_subvariant(sa);
447 goto fail_set_fw_subvariant;
449 sfc_log_init(sa, "set resource limits");
450 rc = sfc_set_drv_limits(sa);
452 goto fail_set_drv_limits;
454 sfc_log_init(sa, "init nic");
455 rc = efx_nic_init(sa->nic);
459 encp = efx_nic_cfg_get(sa->nic);
462 * Refresh (since it may change on NIC reset/restart) a copy of
463 * supported tunnel encapsulations in shared memory to be used
464 * on supported Rx packet type classes get.
466 sa->priv.shared->tunnel_encaps =
467 encp->enc_tunnel_encapsulations_supported;
469 if (encp->enc_tunnel_encapsulations_supported != 0) {
470 sfc_log_init(sa, "apply tunnel config");
471 rc = efx_tunnel_reconfigure(sa->nic);
473 goto fail_tunnel_reconfigure;
476 rc = sfc_intr_start(sa);
478 goto fail_intr_start;
480 rc = sfc_ev_start(sa);
484 rc = sfc_port_start(sa);
486 goto fail_port_start;
488 rc = sfc_rx_start(sa);
492 rc = sfc_tx_start(sa);
496 rc = sfc_flow_start(sa);
498 goto fail_flows_insert;
500 rc = sfc_repr_proxy_start(sa);
502 goto fail_repr_proxy_start;
504 sfc_log_init(sa, "done");
507 fail_repr_proxy_start:
526 fail_tunnel_reconfigure:
527 efx_nic_fini(sa->nic);
531 fail_set_fw_subvariant:
532 sfc_log_init(sa, "failed %d", rc);
537 sfc_start(struct sfc_adapter *sa)
539 unsigned int start_tries = 3;
542 sfc_log_init(sa, "entry");
544 SFC_ASSERT(sfc_adapter_is_locked(sa));
547 case SFC_ETHDEV_CONFIGURED:
549 case SFC_ETHDEV_STARTED:
550 sfc_notice(sa, "already started");
557 sa->state = SFC_ETHDEV_STARTING;
562 * FIXME Try to recreate vSwitch on start retry.
563 * vSwitch is absent after MC reboot like events and
564 * we should recreate it. May be we need proper
565 * indication instead of guessing.
568 sfc_sriov_vswitch_destroy(sa);
569 rc = sfc_sriov_vswitch_create(sa);
571 goto fail_sriov_vswitch_create;
573 rc = sfc_try_start(sa);
574 } while ((--start_tries > 0) &&
575 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
580 sa->state = SFC_ETHDEV_STARTED;
581 sfc_log_init(sa, "done");
585 fail_sriov_vswitch_create:
586 sa->state = SFC_ETHDEV_CONFIGURED;
588 sfc_log_init(sa, "failed %d", rc);
593 sfc_stop(struct sfc_adapter *sa)
595 sfc_log_init(sa, "entry");
597 SFC_ASSERT(sfc_adapter_is_locked(sa));
600 case SFC_ETHDEV_STARTED:
602 case SFC_ETHDEV_CONFIGURED:
603 sfc_notice(sa, "already stopped");
606 sfc_err(sa, "stop in unexpected state %u", sa->state);
611 sa->state = SFC_ETHDEV_STOPPING;
613 sfc_repr_proxy_stop(sa);
620 efx_nic_fini(sa->nic);
622 sa->state = SFC_ETHDEV_CONFIGURED;
623 sfc_log_init(sa, "done");
627 sfc_restart(struct sfc_adapter *sa)
631 SFC_ASSERT(sfc_adapter_is_locked(sa));
633 if (sa->state != SFC_ETHDEV_STARTED)
640 sfc_err(sa, "restart failed");
646 sfc_restart_if_required(void *arg)
648 struct sfc_adapter *sa = arg;
650 /* If restart is scheduled, clear the flag and do it */
651 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
653 sfc_adapter_lock(sa);
654 if (sa->state == SFC_ETHDEV_STARTED)
655 (void)sfc_restart(sa);
656 sfc_adapter_unlock(sa);
661 sfc_schedule_restart(struct sfc_adapter *sa)
665 /* Schedule restart alarm if it is not scheduled yet */
666 if (!rte_atomic32_test_and_set(&sa->restart_required))
669 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
671 sfc_warn(sa, "alarms are not supported, restart is pending");
673 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
675 sfc_notice(sa, "restart scheduled");
679 sfc_configure(struct sfc_adapter *sa)
683 sfc_log_init(sa, "entry");
685 SFC_ASSERT(sfc_adapter_is_locked(sa));
687 SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED ||
688 sa->state == SFC_ETHDEV_CONFIGURED);
689 sa->state = SFC_ETHDEV_CONFIGURING;
691 rc = sfc_check_conf(sa);
693 goto fail_check_conf;
695 rc = sfc_intr_configure(sa);
697 goto fail_intr_configure;
699 rc = sfc_port_configure(sa);
701 goto fail_port_configure;
703 rc = sfc_rx_configure(sa);
705 goto fail_rx_configure;
707 rc = sfc_tx_configure(sa);
709 goto fail_tx_configure;
711 rc = sfc_sw_xstats_configure(sa);
713 goto fail_sw_xstats_configure;
715 sa->state = SFC_ETHDEV_CONFIGURED;
716 sfc_log_init(sa, "done");
719 fail_sw_xstats_configure:
733 sa->state = SFC_ETHDEV_INITIALIZED;
734 sfc_log_init(sa, "failed %d", rc);
739 sfc_close(struct sfc_adapter *sa)
741 sfc_log_init(sa, "entry");
743 SFC_ASSERT(sfc_adapter_is_locked(sa));
745 SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
746 sa->state = SFC_ETHDEV_CLOSING;
748 sfc_sw_xstats_close(sa);
754 sa->state = SFC_ETHDEV_INITIALIZED;
755 sfc_log_init(sa, "done");
759 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
761 struct rte_eth_dev *eth_dev = sa->eth_dev;
762 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
763 efsys_bar_t *ebp = &sa->mem_bar;
764 struct rte_mem_resource *res =
765 &pci_dev->mem_resource[mem_ebrp->ebr_index];
767 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
768 ebp->esb_rid = mem_ebrp->ebr_index;
769 ebp->esb_dev = pci_dev;
770 ebp->esb_base = res->addr;
772 sa->fcw_offset = mem_ebrp->ebr_offset;
778 sfc_mem_bar_fini(struct sfc_adapter *sa)
780 efsys_bar_t *ebp = &sa->mem_bar;
782 SFC_BAR_LOCK_DESTROY(ebp);
783 memset(ebp, 0, sizeof(*ebp));
787 * A fixed RSS key which has a property of being symmetric
788 * (symmetrical flows are distributed to the same CPU)
789 * and also known to give a uniform distribution
790 * (a good distribution of traffic between different CPUs)
792 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
793 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
794 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
795 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
796 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
797 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
801 sfc_rss_attach(struct sfc_adapter *sa)
803 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
806 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
810 rc = efx_ev_init(sa->nic);
814 rc = efx_rx_init(sa->nic);
818 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
820 goto fail_scale_support_get;
822 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
824 goto fail_hash_support_get;
826 rc = sfc_rx_hash_init(sa);
828 goto fail_rx_hash_init;
830 efx_rx_fini(sa->nic);
831 efx_ev_fini(sa->nic);
832 efx_intr_fini(sa->nic);
834 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
835 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
840 fail_hash_support_get:
841 fail_scale_support_get:
842 efx_rx_fini(sa->nic);
845 efx_ev_fini(sa->nic);
848 efx_intr_fini(sa->nic);
855 sfc_rss_detach(struct sfc_adapter *sa)
857 sfc_rx_hash_fini(sa);
861 sfc_attach(struct sfc_adapter *sa)
863 const efx_nic_cfg_t *encp;
864 efx_nic_t *enp = sa->nic;
867 sfc_log_init(sa, "entry");
869 SFC_ASSERT(sfc_adapter_is_locked(sa));
871 efx_mcdi_new_epoch(enp);
873 sfc_log_init(sa, "reset nic");
874 rc = efx_nic_reset(enp);
878 rc = sfc_sriov_attach(sa);
880 goto fail_sriov_attach;
883 * Probed NIC is sufficient for tunnel init.
884 * Initialize tunnel support to be able to use libefx
885 * efx_tunnel_config_udp_{add,remove}() in any state and
886 * efx_tunnel_reconfigure() on start up.
888 rc = efx_tunnel_init(enp);
890 goto fail_tunnel_init;
892 encp = efx_nic_cfg_get(sa->nic);
895 * Make a copy of supported tunnel encapsulations in shared
896 * memory to be used on supported Rx packet type classes get.
898 sa->priv.shared->tunnel_encaps =
899 encp->enc_tunnel_encapsulations_supported;
901 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
902 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
903 encp->enc_tso_v3_enabled;
905 sfc_info(sa, "TSO support isn't available on this adapter");
909 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
910 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
911 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
912 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
913 encp->enc_tso_v3_enabled;
915 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
918 sfc_log_init(sa, "estimate resource limits");
919 rc = sfc_estimate_resource_limits(sa);
921 goto fail_estimate_rsrc_limits;
923 sa->evq_max_entries = encp->enc_evq_max_nevs;
924 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
926 sa->evq_min_entries = encp->enc_evq_min_nevs;
927 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
929 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
930 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
932 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
933 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
935 sa->txq_max_entries = encp->enc_txq_max_ndescs;
936 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
938 sa->txq_min_entries = encp->enc_txq_min_ndescs;
939 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
941 rc = sfc_intr_attach(sa);
943 goto fail_intr_attach;
945 rc = sfc_ev_attach(sa);
949 rc = sfc_port_attach(sa);
951 goto fail_port_attach;
953 rc = sfc_rss_attach(sa);
955 goto fail_rss_attach;
957 rc = sfc_filter_attach(sa);
959 goto fail_filter_attach;
961 rc = sfc_mae_counter_rxq_attach(sa);
963 goto fail_mae_counter_rxq_attach;
965 rc = sfc_mae_attach(sa);
967 goto fail_mae_attach;
969 rc = sfc_mae_switchdev_init(sa);
971 goto fail_mae_switchdev_init;
973 rc = sfc_repr_proxy_attach(sa);
975 goto fail_repr_proxy_attach;
977 sfc_log_init(sa, "fini nic");
982 rc = sfc_sw_xstats_init(sa);
984 goto fail_sw_xstats_init;
987 * Create vSwitch to be able to use VFs when PF is not started yet
988 * as DPDK port. VFs should be able to talk to each other even
991 rc = sfc_sriov_vswitch_create(sa);
993 goto fail_sriov_vswitch_create;
995 sa->state = SFC_ETHDEV_INITIALIZED;
997 sfc_log_init(sa, "done");
1000 fail_sriov_vswitch_create:
1001 sfc_sw_xstats_close(sa);
1003 fail_sw_xstats_init:
1005 sfc_repr_proxy_detach(sa);
1007 fail_repr_proxy_attach:
1008 sfc_mae_switchdev_fini(sa);
1010 fail_mae_switchdev_init:
1014 sfc_mae_counter_rxq_detach(sa);
1016 fail_mae_counter_rxq_attach:
1017 sfc_filter_detach(sa);
1023 sfc_port_detach(sa);
1029 sfc_intr_detach(sa);
1032 efx_nic_fini(sa->nic);
1034 fail_estimate_rsrc_limits:
1036 efx_tunnel_fini(sa->nic);
1037 sfc_sriov_detach(sa);
1042 sfc_log_init(sa, "failed %d", rc);
1047 sfc_pre_detach(struct sfc_adapter *sa)
1049 sfc_log_init(sa, "entry");
1051 SFC_ASSERT(!sfc_adapter_is_locked(sa));
1053 sfc_repr_proxy_pre_detach(sa);
1055 sfc_log_init(sa, "done");
1059 sfc_detach(struct sfc_adapter *sa)
1061 sfc_log_init(sa, "entry");
1063 SFC_ASSERT(sfc_adapter_is_locked(sa));
1065 sfc_sriov_vswitch_destroy(sa);
1069 sfc_repr_proxy_detach(sa);
1070 sfc_mae_switchdev_fini(sa);
1072 sfc_mae_counter_rxq_detach(sa);
1073 sfc_filter_detach(sa);
1075 sfc_port_detach(sa);
1077 sfc_intr_detach(sa);
1078 efx_tunnel_fini(sa->nic);
1079 sfc_sriov_detach(sa);
1081 sa->state = SFC_ETHDEV_UNINITIALIZED;
1085 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
1086 const char *value_str, void *opaque)
1088 uint32_t *value = opaque;
1090 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
1091 *value = EFX_FW_VARIANT_DONT_CARE;
1092 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
1093 *value = EFX_FW_VARIANT_FULL_FEATURED;
1094 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
1095 *value = EFX_FW_VARIANT_LOW_LATENCY;
1096 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
1097 *value = EFX_FW_VARIANT_PACKED_STREAM;
1098 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
1099 *value = EFX_FW_VARIANT_DPDK;
1107 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
1109 efx_nic_fw_info_t enfi;
1112 rc = efx_nic_get_fw_version(sa->nic, &enfi);
1115 else if (!enfi.enfi_dpcpu_fw_ids_valid)
1119 * Firmware variant can be uniquely identified by the RxDPCPU
1122 switch (enfi.enfi_rx_dpcpu_fw_id) {
1123 case EFX_RXDP_FULL_FEATURED_FW_ID:
1124 *efv = EFX_FW_VARIANT_FULL_FEATURED;
1127 case EFX_RXDP_LOW_LATENCY_FW_ID:
1128 *efv = EFX_FW_VARIANT_LOW_LATENCY;
1131 case EFX_RXDP_PACKED_STREAM_FW_ID:
1132 *efv = EFX_FW_VARIANT_PACKED_STREAM;
1135 case EFX_RXDP_DPDK_FW_ID:
1136 *efv = EFX_FW_VARIANT_DPDK;
1141 * Other firmware variants are not considered, since they are
1142 * not supported in the device parameters
1144 *efv = EFX_FW_VARIANT_DONT_CARE;
1152 sfc_fw_variant2str(efx_fw_variant_t efv)
1155 case EFX_RXDP_FULL_FEATURED_FW_ID:
1156 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1157 case EFX_RXDP_LOW_LATENCY_FW_ID:
1158 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1159 case EFX_RXDP_PACKED_STREAM_FW_ID:
1160 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1161 case EFX_RXDP_DPDK_FW_ID:
1162 return SFC_KVARG_FW_VARIANT_DPDK;
1169 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1174 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1176 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1177 sfc_kvarg_long_handler, &value);
1182 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1183 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1184 "was set (%ld);", value);
1185 sfc_err(sa, "it must not be less than 0 or greater than %u",
1186 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1190 sa->rxd_wait_timeout_ns = value;
1195 sfc_nic_probe(struct sfc_adapter *sa)
1197 efx_nic_t *enp = sa->nic;
1198 efx_fw_variant_t preferred_efv;
1199 efx_fw_variant_t efv;
1202 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1203 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1204 sfc_kvarg_fv_variant_handler,
1207 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1211 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1215 rc = efx_nic_probe(enp, preferred_efv);
1217 /* Unprivileged functions cannot set FW variant */
1218 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1223 rc = sfc_get_fw_variant(sa, &efv);
1224 if (rc == ENOTSUP) {
1225 sfc_warn(sa, "FW variant can not be obtained");
1231 /* Check that firmware variant was changed to the requested one */
1232 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1233 sfc_warn(sa, "FW variant has not changed to the requested %s",
1234 sfc_fw_variant2str(preferred_efv));
1237 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1243 sfc_probe(struct sfc_adapter *sa)
1245 efx_bar_region_t mem_ebrp;
1246 struct rte_eth_dev *eth_dev = sa->eth_dev;
1247 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1251 sfc_log_init(sa, "entry");
1253 SFC_ASSERT(sfc_adapter_is_locked(sa));
1255 sa->socket_id = rte_socket_id();
1256 rte_atomic32_init(&sa->restart_required);
1258 sfc_log_init(sa, "get family");
1259 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1264 "family is %u, membar is %u, function control window offset is %lu",
1265 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1267 sfc_log_init(sa, "init mem bar");
1268 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1270 goto fail_mem_bar_init;
1272 sfc_log_init(sa, "create nic");
1273 rte_spinlock_init(&sa->nic_lock);
1274 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1275 &sa->mem_bar, mem_ebrp.ebr_offset,
1276 &sa->nic_lock, &enp);
1278 goto fail_nic_create;
1281 rc = sfc_mcdi_init(sa);
1283 goto fail_mcdi_init;
1285 sfc_log_init(sa, "probe nic");
1286 rc = sfc_nic_probe(sa);
1288 goto fail_nic_probe;
1290 sfc_log_init(sa, "done");
1297 sfc_log_init(sa, "destroy nic");
1299 efx_nic_destroy(enp);
1302 sfc_mem_bar_fini(sa);
1306 sfc_log_init(sa, "failed %d", rc);
1311 sfc_unprobe(struct sfc_adapter *sa)
1313 efx_nic_t *enp = sa->nic;
1315 sfc_log_init(sa, "entry");
1317 SFC_ASSERT(sfc_adapter_is_locked(sa));
1319 sfc_log_init(sa, "unprobe nic");
1320 efx_nic_unprobe(enp);
1325 * Make sure there is no pending alarm to restart since we are
1326 * going to free device private which is passed as the callback
1327 * opaque data. A new alarm cannot be scheduled since MCDI is
1330 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1332 sfc_log_init(sa, "destroy nic");
1334 efx_nic_destroy(enp);
1336 sfc_mem_bar_fini(sa);
1339 sa->state = SFC_ETHDEV_UNINITIALIZED;
1343 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1344 const char *lt_prefix_str, uint32_t ll_default)
1346 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1347 size_t lt_str_size_max;
1348 char *lt_str = NULL;
1351 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1352 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1353 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1355 return sfc_logtype_driver;
1358 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1360 return sfc_logtype_driver;
1362 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1363 lt_str[lt_prefix_str_size - 1] = '.';
1364 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1365 lt_str_size_max - lt_prefix_str_size);
1366 lt_str[lt_str_size_max - 1] = '\0';
1368 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1372 return sfc_logtype_driver;
1377 struct sfc_hw_switch_id {
1378 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1382 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1383 struct sfc_hw_switch_id **idp)
1385 efx_nic_board_info_t board_info;
1386 struct sfc_hw_switch_id *id;
1392 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1396 rc = efx_nic_get_board_info(sa->nic, &board_info);
1400 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1408 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1409 struct sfc_hw_switch_id *id)
1415 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1416 const struct sfc_hw_switch_id *right)
1418 return strncmp(left->board_sn, right->board_sn,
1419 sizeof(left->board_sn)) == 0;