4 * Copyright (c) 2016-2017 Solarflare Communications Inc.
7 * This software was jointly developed between OKTET Labs (under contract
8 * for Solarflare) and Solarflare Communications, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
13 * 1. Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <rte_errno.h>
36 #include <rte_alarm.h>
48 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
49 size_t len, int socket_id, efsys_mem_t *esmp)
51 const struct rte_memzone *mz;
53 sfc_log_init(sa, "name=%s id=%u len=%lu socket_id=%d",
54 name, id, len, socket_id);
56 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
57 sysconf(_SC_PAGESIZE), socket_id);
59 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
60 name, (unsigned int)id, (unsigned int)len, socket_id,
61 rte_strerror(rte_errno));
65 esmp->esm_addr = mz->iova;
66 if (esmp->esm_addr == RTE_BAD_IOVA) {
67 (void)rte_memzone_free(mz);
72 esmp->esm_base = mz->addr;
78 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
82 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
84 rc = rte_memzone_free(esmp->esm_mz);
86 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
88 memset(esmp, 0, sizeof(*esmp));
92 sfc_phy_cap_from_link_speeds(uint32_t speeds)
94 uint32_t phy_caps = 0;
96 if (~speeds & ETH_LINK_SPEED_FIXED) {
97 phy_caps |= (1 << EFX_PHY_CAP_AN);
99 * If no speeds are specified in the mask, any supported
102 if (speeds == ETH_LINK_SPEED_AUTONEG)
104 (1 << EFX_PHY_CAP_1000FDX) |
105 (1 << EFX_PHY_CAP_10000FDX) |
106 (1 << EFX_PHY_CAP_40000FDX);
108 if (speeds & ETH_LINK_SPEED_1G)
109 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
110 if (speeds & ETH_LINK_SPEED_10G)
111 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
112 if (speeds & ETH_LINK_SPEED_40G)
113 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
119 * Check requested device level configuration.
120 * Receive and transmit configuration is checked in corresponding
124 sfc_check_conf(struct sfc_adapter *sa)
126 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
129 sa->port.phy_adv_cap =
130 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
131 sa->port.phy_adv_cap_mask;
132 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
133 sfc_err(sa, "No link speeds from mask %#x are supported",
138 if (conf->lpbk_mode != 0) {
139 sfc_err(sa, "Loopback not supported");
143 if (conf->dcb_capability_en != 0) {
144 sfc_err(sa, "Priority-based flow control not supported");
148 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
149 sfc_err(sa, "Flow Director not supported");
153 if ((conf->intr_conf.lsc != 0) &&
154 (sa->intr.type != EFX_INTR_LINE) &&
155 (sa->intr.type != EFX_INTR_MESSAGE)) {
156 sfc_err(sa, "Link status change interrupt not supported");
160 if (conf->intr_conf.rxq != 0) {
161 sfc_err(sa, "Receive queue interrupt not supported");
169 * Find out maximum number of receive and transmit queues which could be
172 * NIC is kept initialized on success to allow other modules acquire
173 * defaults and capabilities.
176 sfc_estimate_resource_limits(struct sfc_adapter *sa)
178 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
179 efx_drv_limits_t limits;
181 uint32_t evq_allocated;
182 uint32_t rxq_allocated;
183 uint32_t txq_allocated;
185 memset(&limits, 0, sizeof(limits));
187 /* Request at least one Rx and Tx queue */
188 limits.edl_min_rxq_count = 1;
189 limits.edl_min_txq_count = 1;
190 /* Management event queue plus event queue for each Tx and Rx queue */
191 limits.edl_min_evq_count =
192 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
194 /* Divide by number of functions to guarantee that all functions
195 * will get promised resources
197 /* FIXME Divide by number of functions (not 2) below */
198 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
199 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
201 /* Split equally between receive and transmit */
202 limits.edl_max_rxq_count =
203 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
204 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
206 limits.edl_max_txq_count =
207 MIN(encp->enc_txq_limit,
208 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
211 limits.edl_max_txq_count =
212 MIN(limits.edl_max_txq_count,
213 encp->enc_fw_assisted_tso_v2_n_contexts /
214 encp->enc_hw_pf_count);
216 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
218 /* Configure the minimum required resources needed for the
219 * driver to operate, and the maximum desired resources that the
220 * driver is capable of using.
222 efx_nic_set_drv_limits(sa->nic, &limits);
224 sfc_log_init(sa, "init nic");
225 rc = efx_nic_init(sa->nic);
229 /* Find resource dimensions assigned by firmware to this function */
230 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
233 goto fail_get_vi_pool;
235 /* It still may allocate more than maximum, ensure limit */
236 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
237 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
238 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
240 /* Subtract management EVQ not used for traffic */
241 SFC_ASSERT(evq_allocated > 0);
244 /* Right now we use separate EVQ for Rx and Tx */
245 sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2);
246 sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max);
248 /* Keep NIC initialized */
253 efx_nic_fini(sa->nic);
258 sfc_set_drv_limits(struct sfc_adapter *sa)
260 const struct rte_eth_dev_data *data = sa->eth_dev->data;
261 efx_drv_limits_t lim;
263 memset(&lim, 0, sizeof(lim));
265 /* Limits are strict since take into account initial estimation */
266 lim.edl_min_evq_count = lim.edl_max_evq_count =
267 1 + data->nb_rx_queues + data->nb_tx_queues;
268 lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues;
269 lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
271 return efx_nic_set_drv_limits(sa->nic, &lim);
275 sfc_try_start(struct sfc_adapter *sa)
277 const efx_nic_cfg_t *encp;
280 sfc_log_init(sa, "entry");
282 SFC_ASSERT(sfc_adapter_is_locked(sa));
283 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
285 sfc_log_init(sa, "set resource limits");
286 rc = sfc_set_drv_limits(sa);
288 goto fail_set_drv_limits;
290 sfc_log_init(sa, "init nic");
291 rc = efx_nic_init(sa->nic);
295 encp = efx_nic_cfg_get(sa->nic);
296 if (encp->enc_tunnel_encapsulations_supported != 0) {
297 sfc_log_init(sa, "apply tunnel config");
298 rc = efx_tunnel_reconfigure(sa->nic);
300 goto fail_tunnel_reconfigure;
303 rc = sfc_intr_start(sa);
305 goto fail_intr_start;
307 rc = sfc_ev_start(sa);
311 rc = sfc_port_start(sa);
313 goto fail_port_start;
315 rc = sfc_rx_start(sa);
319 rc = sfc_tx_start(sa);
323 rc = sfc_flow_start(sa);
325 goto fail_flows_insert;
327 sfc_log_init(sa, "done");
346 fail_tunnel_reconfigure:
347 efx_nic_fini(sa->nic);
351 sfc_log_init(sa, "failed %d", rc);
356 sfc_start(struct sfc_adapter *sa)
358 unsigned int start_tries = 3;
361 sfc_log_init(sa, "entry");
363 SFC_ASSERT(sfc_adapter_is_locked(sa));
366 case SFC_ADAPTER_CONFIGURED:
368 case SFC_ADAPTER_STARTED:
369 sfc_info(sa, "already started");
376 sa->state = SFC_ADAPTER_STARTING;
379 rc = sfc_try_start(sa);
380 } while ((--start_tries > 0) &&
381 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
386 sa->state = SFC_ADAPTER_STARTED;
387 sfc_log_init(sa, "done");
391 sa->state = SFC_ADAPTER_CONFIGURED;
393 sfc_log_init(sa, "failed %d", rc);
398 sfc_stop(struct sfc_adapter *sa)
400 sfc_log_init(sa, "entry");
402 SFC_ASSERT(sfc_adapter_is_locked(sa));
405 case SFC_ADAPTER_STARTED:
407 case SFC_ADAPTER_CONFIGURED:
408 sfc_info(sa, "already stopped");
411 sfc_err(sa, "stop in unexpected state %u", sa->state);
416 sa->state = SFC_ADAPTER_STOPPING;
424 efx_nic_fini(sa->nic);
426 sa->state = SFC_ADAPTER_CONFIGURED;
427 sfc_log_init(sa, "done");
431 sfc_restart(struct sfc_adapter *sa)
435 SFC_ASSERT(sfc_adapter_is_locked(sa));
437 if (sa->state != SFC_ADAPTER_STARTED)
444 sfc_err(sa, "restart failed");
450 sfc_restart_if_required(void *arg)
452 struct sfc_adapter *sa = arg;
454 /* If restart is scheduled, clear the flag and do it */
455 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
457 sfc_adapter_lock(sa);
458 if (sa->state == SFC_ADAPTER_STARTED)
459 (void)sfc_restart(sa);
460 sfc_adapter_unlock(sa);
465 sfc_schedule_restart(struct sfc_adapter *sa)
469 /* Schedule restart alarm if it is not scheduled yet */
470 if (!rte_atomic32_test_and_set(&sa->restart_required))
473 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
475 sfc_warn(sa, "alarms are not supported, restart is pending");
477 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
479 sfc_info(sa, "restart scheduled");
483 sfc_configure(struct sfc_adapter *sa)
487 sfc_log_init(sa, "entry");
489 SFC_ASSERT(sfc_adapter_is_locked(sa));
491 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
492 sa->state == SFC_ADAPTER_CONFIGURED);
493 sa->state = SFC_ADAPTER_CONFIGURING;
495 rc = sfc_check_conf(sa);
497 goto fail_check_conf;
499 rc = sfc_intr_configure(sa);
501 goto fail_intr_configure;
503 rc = sfc_port_configure(sa);
505 goto fail_port_configure;
507 rc = sfc_rx_configure(sa);
509 goto fail_rx_configure;
511 rc = sfc_tx_configure(sa);
513 goto fail_tx_configure;
515 sa->state = SFC_ADAPTER_CONFIGURED;
516 sfc_log_init(sa, "done");
530 sa->state = SFC_ADAPTER_INITIALIZED;
531 sfc_log_init(sa, "failed %d", rc);
536 sfc_close(struct sfc_adapter *sa)
538 sfc_log_init(sa, "entry");
540 SFC_ASSERT(sfc_adapter_is_locked(sa));
542 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
543 sa->state = SFC_ADAPTER_CLOSING;
550 sa->state = SFC_ADAPTER_INITIALIZED;
551 sfc_log_init(sa, "done");
555 sfc_mem_bar_init(struct sfc_adapter *sa)
557 struct rte_eth_dev *eth_dev = sa->eth_dev;
558 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
559 efsys_bar_t *ebp = &sa->mem_bar;
561 struct rte_mem_resource *res;
563 for (i = 0; i < RTE_DIM(pci_dev->mem_resource); i++) {
564 res = &pci_dev->mem_resource[i];
565 if ((res->len != 0) && (res->phys_addr != 0)) {
566 /* Found first memory BAR */
567 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
569 ebp->esb_dev = pci_dev;
570 ebp->esb_base = res->addr;
579 sfc_mem_bar_fini(struct sfc_adapter *sa)
581 efsys_bar_t *ebp = &sa->mem_bar;
583 SFC_BAR_LOCK_DESTROY(ebp);
584 memset(ebp, 0, sizeof(*ebp));
587 #if EFSYS_OPT_RX_SCALE
589 * A fixed RSS key which has a property of being symmetric
590 * (symmetrical flows are distributed to the same CPU)
591 * and also known to give a uniform distribution
592 * (a good distribution of traffic between different CPUs)
594 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
595 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
596 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
597 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
598 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
599 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
603 #if EFSYS_OPT_RX_SCALE
605 sfc_set_rss_defaults(struct sfc_adapter *sa)
609 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
613 rc = efx_ev_init(sa->nic);
617 rc = efx_rx_init(sa->nic);
621 rc = efx_rx_scale_default_support_get(sa->nic, &sa->rss_support);
623 goto fail_scale_support_get;
625 rc = efx_rx_hash_default_support_get(sa->nic, &sa->hash_support);
627 goto fail_hash_support_get;
629 efx_rx_fini(sa->nic);
630 efx_ev_fini(sa->nic);
631 efx_intr_fini(sa->nic);
633 sa->rss_hash_types = sfc_rte_to_efx_hash_type(SFC_RSS_OFFLOADS);
635 rte_memcpy(sa->rss_key, default_rss_key, sizeof(sa->rss_key));
639 fail_hash_support_get:
640 fail_scale_support_get:
642 efx_ev_fini(sa->nic);
645 efx_intr_fini(sa->nic);
652 sfc_set_rss_defaults(__rte_unused struct sfc_adapter *sa)
659 sfc_attach(struct sfc_adapter *sa)
661 const efx_nic_cfg_t *encp;
662 efx_nic_t *enp = sa->nic;
665 sfc_log_init(sa, "entry");
667 SFC_ASSERT(sfc_adapter_is_locked(sa));
669 efx_mcdi_new_epoch(enp);
671 sfc_log_init(sa, "reset nic");
672 rc = efx_nic_reset(enp);
677 * Probed NIC is sufficient for tunnel init.
678 * Initialize tunnel support to be able to use libefx
679 * efx_tunnel_config_udp_{add,remove}() in any state and
680 * efx_tunnel_reconfigure() on start up.
682 rc = efx_tunnel_init(enp);
684 goto fail_tunnel_init;
686 encp = efx_nic_cfg_get(sa->nic);
688 if (sa->dp_tx->features & SFC_DP_TX_FEAT_TSO) {
689 sa->tso = encp->enc_fw_assisted_tso_v2_enabled;
692 "TSO support isn't available on this adapter");
695 sfc_log_init(sa, "estimate resource limits");
696 rc = sfc_estimate_resource_limits(sa);
698 goto fail_estimate_rsrc_limits;
700 sa->txq_max_entries = encp->enc_txq_max_ndescs;
701 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
703 rc = sfc_intr_attach(sa);
705 goto fail_intr_attach;
707 rc = sfc_ev_attach(sa);
711 rc = sfc_port_attach(sa);
713 goto fail_port_attach;
715 rc = sfc_set_rss_defaults(sa);
717 goto fail_set_rss_defaults;
719 rc = sfc_filter_attach(sa);
721 goto fail_filter_attach;
723 sfc_log_init(sa, "fini nic");
728 sa->state = SFC_ADAPTER_INITIALIZED;
730 sfc_log_init(sa, "done");
734 fail_set_rss_defaults:
744 efx_nic_fini(sa->nic);
746 fail_estimate_rsrc_limits:
748 efx_tunnel_fini(sa->nic);
752 sfc_log_init(sa, "failed %d", rc);
757 sfc_detach(struct sfc_adapter *sa)
759 sfc_log_init(sa, "entry");
761 SFC_ASSERT(sfc_adapter_is_locked(sa));
765 sfc_filter_detach(sa);
769 efx_tunnel_fini(sa->nic);
771 sa->state = SFC_ADAPTER_UNINITIALIZED;
775 sfc_probe(struct sfc_adapter *sa)
777 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(sa->eth_dev);
781 sfc_log_init(sa, "entry");
783 SFC_ASSERT(sfc_adapter_is_locked(sa));
785 sa->socket_id = rte_socket_id();
786 rte_atomic32_init(&sa->restart_required);
788 sfc_log_init(sa, "init mem bar");
789 rc = sfc_mem_bar_init(sa);
791 goto fail_mem_bar_init;
793 sfc_log_init(sa, "get family");
794 rc = efx_family(pci_dev->id.vendor_id, pci_dev->id.device_id,
798 sfc_log_init(sa, "family is %u", sa->family);
800 sfc_log_init(sa, "create nic");
801 rte_spinlock_init(&sa->nic_lock);
802 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
803 &sa->mem_bar, &sa->nic_lock, &enp);
805 goto fail_nic_create;
808 rc = sfc_mcdi_init(sa);
812 sfc_log_init(sa, "probe nic");
813 rc = efx_nic_probe(enp);
817 sfc_log_init(sa, "done");
824 sfc_log_init(sa, "destroy nic");
826 efx_nic_destroy(enp);
830 sfc_mem_bar_fini(sa);
833 sfc_log_init(sa, "failed %d", rc);
838 sfc_unprobe(struct sfc_adapter *sa)
840 efx_nic_t *enp = sa->nic;
842 sfc_log_init(sa, "entry");
844 SFC_ASSERT(sfc_adapter_is_locked(sa));
846 sfc_log_init(sa, "unprobe nic");
847 efx_nic_unprobe(enp);
852 * Make sure there is no pending alarm to restart since we are
853 * going to free device private which is passed as the callback
854 * opaque data. A new alarm cannot be scheduled since MCDI is
857 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
859 sfc_log_init(sa, "destroy nic");
861 efx_nic_destroy(enp);
863 sfc_mem_bar_fini(sa);
866 sa->state = SFC_ADAPTER_UNINITIALIZED;