net/sfc: add port representors infrastructure
[dpdk.git] / drivers / net / sfc / sfc_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 #include <rte_dev.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_errno.h>
16 #include <rte_string_fns.h>
17 #include <rte_ether.h>
18
19 #include "efx.h"
20
21 #include "sfc.h"
22 #include "sfc_debug.h"
23 #include "sfc_log.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_ev.h"
26 #include "sfc_rx.h"
27 #include "sfc_tx.h"
28 #include "sfc_flow.h"
29 #include "sfc_dp.h"
30 #include "sfc_dp_rx.h"
31 #include "sfc_repr.h"
32 #include "sfc_sw_stats.h"
33
34 #define SFC_XSTAT_ID_INVALID_VAL  UINT64_MAX
35 #define SFC_XSTAT_ID_INVALID_NAME '\0'
36
37 uint32_t sfc_logtype_driver;
38
39 static struct sfc_dp_list sfc_dp_head =
40         TAILQ_HEAD_INITIALIZER(sfc_dp_head);
41
42
43 static void sfc_eth_dev_clear_ops(struct rte_eth_dev *dev);
44
45
46 static int
47 sfc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
48 {
49         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
50         efx_nic_fw_info_t enfi;
51         int ret;
52         int rc;
53
54         rc = efx_nic_get_fw_version(sa->nic, &enfi);
55         if (rc != 0)
56                 return -rc;
57
58         ret = snprintf(fw_version, fw_size,
59                        "%" PRIu16 ".%" PRIu16 ".%" PRIu16 ".%" PRIu16,
60                        enfi.enfi_mc_fw_version[0], enfi.enfi_mc_fw_version[1],
61                        enfi.enfi_mc_fw_version[2], enfi.enfi_mc_fw_version[3]);
62         if (ret < 0)
63                 return ret;
64
65         if (enfi.enfi_dpcpu_fw_ids_valid) {
66                 size_t dpcpu_fw_ids_offset = MIN(fw_size - 1, (size_t)ret);
67                 int ret_extra;
68
69                 ret_extra = snprintf(fw_version + dpcpu_fw_ids_offset,
70                                      fw_size - dpcpu_fw_ids_offset,
71                                      " rx%" PRIx16 " tx%" PRIx16,
72                                      enfi.enfi_rx_dpcpu_fw_id,
73                                      enfi.enfi_tx_dpcpu_fw_id);
74                 if (ret_extra < 0)
75                         return ret_extra;
76
77                 ret += ret_extra;
78         }
79
80         if (fw_size < (size_t)(++ret))
81                 return ret;
82         else
83                 return 0;
84 }
85
86 static int
87 sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
88 {
89         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
90         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
91         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
92         struct sfc_rss *rss = &sas->rss;
93         struct sfc_mae *mae = &sa->mae;
94         uint64_t txq_offloads_def = 0;
95
96         sfc_log_init(sa, "entry");
97
98         dev_info->min_mtu = RTE_ETHER_MIN_MTU;
99         dev_info->max_mtu = EFX_MAC_SDU_MAX;
100
101         dev_info->max_rx_pktlen = EFX_MAC_PDU_MAX;
102
103         dev_info->max_vfs = sa->sriov.num_vfs;
104
105         /* Autonegotiation may be disabled */
106         dev_info->speed_capa = ETH_LINK_SPEED_FIXED;
107         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_1000FDX))
108                 dev_info->speed_capa |= ETH_LINK_SPEED_1G;
109         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_10000FDX))
110                 dev_info->speed_capa |= ETH_LINK_SPEED_10G;
111         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_25000FDX))
112                 dev_info->speed_capa |= ETH_LINK_SPEED_25G;
113         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_40000FDX))
114                 dev_info->speed_capa |= ETH_LINK_SPEED_40G;
115         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_50000FDX))
116                 dev_info->speed_capa |= ETH_LINK_SPEED_50G;
117         if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_100000FDX))
118                 dev_info->speed_capa |= ETH_LINK_SPEED_100G;
119
120         dev_info->max_rx_queues = sa->rxq_max;
121         dev_info->max_tx_queues = sa->txq_max;
122
123         /* By default packets are dropped if no descriptors are available */
124         dev_info->default_rxconf.rx_drop_en = 1;
125
126         dev_info->rx_queue_offload_capa = sfc_rx_get_queue_offload_caps(sa);
127
128         /*
129          * rx_offload_capa includes both device and queue offloads since
130          * the latter may be requested on a per device basis which makes
131          * sense when some offloads are needed to be set on all queues.
132          */
133         dev_info->rx_offload_capa = sfc_rx_get_dev_offload_caps(sa) |
134                                     dev_info->rx_queue_offload_capa;
135
136         dev_info->tx_queue_offload_capa = sfc_tx_get_queue_offload_caps(sa);
137
138         /*
139          * tx_offload_capa includes both device and queue offloads since
140          * the latter may be requested on a per device basis which makes
141          * sense when some offloads are needed to be set on all queues.
142          */
143         dev_info->tx_offload_capa = sfc_tx_get_dev_offload_caps(sa) |
144                                     dev_info->tx_queue_offload_capa;
145
146         if (dev_info->tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
147                 txq_offloads_def |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
148
149         dev_info->default_txconf.offloads |= txq_offloads_def;
150
151         if (rss->context_type != EFX_RX_SCALE_UNAVAILABLE) {
152                 uint64_t rte_hf = 0;
153                 unsigned int i;
154
155                 for (i = 0; i < rss->hf_map_nb_entries; ++i)
156                         rte_hf |= rss->hf_map[i].rte;
157
158                 dev_info->reta_size = EFX_RSS_TBL_SIZE;
159                 dev_info->hash_key_size = EFX_RSS_KEY_SIZE;
160                 dev_info->flow_type_rss_offloads = rte_hf;
161         }
162
163         /* Initialize to hardware limits */
164         dev_info->rx_desc_lim.nb_max = sa->rxq_max_entries;
165         dev_info->rx_desc_lim.nb_min = sa->rxq_min_entries;
166         /* The RXQ hardware requires that the descriptor count is a power
167          * of 2, but rx_desc_lim cannot properly describe that constraint.
168          */
169         dev_info->rx_desc_lim.nb_align = sa->rxq_min_entries;
170
171         /* Initialize to hardware limits */
172         dev_info->tx_desc_lim.nb_max = sa->txq_max_entries;
173         dev_info->tx_desc_lim.nb_min = sa->txq_min_entries;
174         /*
175          * The TXQ hardware requires that the descriptor count is a power
176          * of 2, but tx_desc_lim cannot properly describe that constraint
177          */
178         dev_info->tx_desc_lim.nb_align = sa->txq_min_entries;
179
180         if (sap->dp_rx->get_dev_info != NULL)
181                 sap->dp_rx->get_dev_info(dev_info);
182         if (sap->dp_tx->get_dev_info != NULL)
183                 sap->dp_tx->get_dev_info(dev_info);
184
185         dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
186                              RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
187
188         if (mae->status == SFC_MAE_STATUS_SUPPORTED) {
189                 dev_info->switch_info.name = dev->device->driver->name;
190                 dev_info->switch_info.domain_id = mae->switch_domain_id;
191                 dev_info->switch_info.port_id = mae->switch_port_id;
192         }
193
194         return 0;
195 }
196
197 static const uint32_t *
198 sfc_dev_supported_ptypes_get(struct rte_eth_dev *dev)
199 {
200         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
201
202         return sap->dp_rx->supported_ptypes_get(sap->shared->tunnel_encaps);
203 }
204
205 static int
206 sfc_dev_configure(struct rte_eth_dev *dev)
207 {
208         struct rte_eth_dev_data *dev_data = dev->data;
209         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
210         int rc;
211
212         sfc_log_init(sa, "entry n_rxq=%u n_txq=%u",
213                      dev_data->nb_rx_queues, dev_data->nb_tx_queues);
214
215         sfc_adapter_lock(sa);
216         switch (sa->state) {
217         case SFC_ETHDEV_CONFIGURED:
218                 /* FALLTHROUGH */
219         case SFC_ETHDEV_INITIALIZED:
220                 rc = sfc_configure(sa);
221                 break;
222         default:
223                 sfc_err(sa, "unexpected adapter state %u to configure",
224                         sa->state);
225                 rc = EINVAL;
226                 break;
227         }
228         sfc_adapter_unlock(sa);
229
230         sfc_log_init(sa, "done %d", rc);
231         SFC_ASSERT(rc >= 0);
232         return -rc;
233 }
234
235 static int
236 sfc_dev_start(struct rte_eth_dev *dev)
237 {
238         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
239         int rc;
240
241         sfc_log_init(sa, "entry");
242
243         sfc_adapter_lock(sa);
244         rc = sfc_start(sa);
245         sfc_adapter_unlock(sa);
246
247         sfc_log_init(sa, "done %d", rc);
248         SFC_ASSERT(rc >= 0);
249         return -rc;
250 }
251
252 static int
253 sfc_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
254 {
255         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
256         struct rte_eth_link current_link;
257         int ret;
258
259         sfc_log_init(sa, "entry");
260
261         if (sa->state != SFC_ETHDEV_STARTED) {
262                 sfc_port_link_mode_to_info(EFX_LINK_UNKNOWN, &current_link);
263         } else if (wait_to_complete) {
264                 efx_link_mode_t link_mode;
265
266                 if (efx_port_poll(sa->nic, &link_mode) != 0)
267                         link_mode = EFX_LINK_UNKNOWN;
268                 sfc_port_link_mode_to_info(link_mode, &current_link);
269
270         } else {
271                 sfc_ev_mgmt_qpoll(sa);
272                 rte_eth_linkstatus_get(dev, &current_link);
273         }
274
275         ret = rte_eth_linkstatus_set(dev, &current_link);
276         if (ret == 0)
277                 sfc_notice(sa, "Link status is %s",
278                            current_link.link_status ? "UP" : "DOWN");
279
280         return ret;
281 }
282
283 static int
284 sfc_dev_stop(struct rte_eth_dev *dev)
285 {
286         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
287
288         sfc_log_init(sa, "entry");
289
290         sfc_adapter_lock(sa);
291         sfc_stop(sa);
292         sfc_adapter_unlock(sa);
293
294         sfc_log_init(sa, "done");
295
296         return 0;
297 }
298
299 static int
300 sfc_dev_set_link_up(struct rte_eth_dev *dev)
301 {
302         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
303         int rc;
304
305         sfc_log_init(sa, "entry");
306
307         sfc_adapter_lock(sa);
308         rc = sfc_start(sa);
309         sfc_adapter_unlock(sa);
310
311         SFC_ASSERT(rc >= 0);
312         return -rc;
313 }
314
315 static int
316 sfc_dev_set_link_down(struct rte_eth_dev *dev)
317 {
318         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
319
320         sfc_log_init(sa, "entry");
321
322         sfc_adapter_lock(sa);
323         sfc_stop(sa);
324         sfc_adapter_unlock(sa);
325
326         return 0;
327 }
328
329 static void
330 sfc_eth_dev_secondary_clear_ops(struct rte_eth_dev *dev)
331 {
332         free(dev->process_private);
333         rte_eth_dev_release_port(dev);
334 }
335
336 static int
337 sfc_dev_close(struct rte_eth_dev *dev)
338 {
339         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
340
341         sfc_log_init(sa, "entry");
342
343         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
344                 sfc_eth_dev_secondary_clear_ops(dev);
345                 return 0;
346         }
347
348         sfc_adapter_lock(sa);
349         switch (sa->state) {
350         case SFC_ETHDEV_STARTED:
351                 sfc_stop(sa);
352                 SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
353                 /* FALLTHROUGH */
354         case SFC_ETHDEV_CONFIGURED:
355                 sfc_close(sa);
356                 SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED);
357                 /* FALLTHROUGH */
358         case SFC_ETHDEV_INITIALIZED:
359                 break;
360         default:
361                 sfc_err(sa, "unexpected adapter state %u on close", sa->state);
362                 break;
363         }
364
365         /*
366          * Cleanup all resources.
367          * Rollback primary process sfc_eth_dev_init() below.
368          */
369
370         sfc_eth_dev_clear_ops(dev);
371
372         sfc_detach(sa);
373         sfc_unprobe(sa);
374
375         sfc_kvargs_cleanup(sa);
376
377         sfc_adapter_unlock(sa);
378         sfc_adapter_lock_fini(sa);
379
380         sfc_log_init(sa, "done");
381
382         /* Required for logging, so cleanup last */
383         sa->eth_dev = NULL;
384
385         free(sa);
386
387         return 0;
388 }
389
390 static int
391 sfc_dev_filter_set(struct rte_eth_dev *dev, enum sfc_dev_filter_mode mode,
392                    boolean_t enabled)
393 {
394         struct sfc_port *port;
395         boolean_t *toggle;
396         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
397         boolean_t allmulti = (mode == SFC_DEV_FILTER_MODE_ALLMULTI);
398         const char *desc = (allmulti) ? "all-multi" : "promiscuous";
399         int rc = 0;
400
401         sfc_adapter_lock(sa);
402
403         port = &sa->port;
404         toggle = (allmulti) ? (&port->allmulti) : (&port->promisc);
405
406         if (*toggle != enabled) {
407                 *toggle = enabled;
408
409                 if (sfc_sa2shared(sa)->isolated) {
410                         sfc_warn(sa, "isolated mode is active on the port");
411                         sfc_warn(sa, "the change is to be applied on the next "
412                                      "start provided that isolated mode is "
413                                      "disabled prior the next start");
414                 } else if ((sa->state == SFC_ETHDEV_STARTED) &&
415                            ((rc = sfc_set_rx_mode(sa)) != 0)) {
416                         *toggle = !(enabled);
417                         sfc_warn(sa, "Failed to %s %s mode, rc = %d",
418                                  ((enabled) ? "enable" : "disable"), desc, rc);
419
420                         /*
421                          * For promiscuous and all-multicast filters a
422                          * permission failure should be reported as an
423                          * unsupported filter.
424                          */
425                         if (rc == EPERM)
426                                 rc = ENOTSUP;
427                 }
428         }
429
430         sfc_adapter_unlock(sa);
431         return rc;
432 }
433
434 static int
435 sfc_dev_promisc_enable(struct rte_eth_dev *dev)
436 {
437         int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_TRUE);
438
439         SFC_ASSERT(rc >= 0);
440         return -rc;
441 }
442
443 static int
444 sfc_dev_promisc_disable(struct rte_eth_dev *dev)
445 {
446         int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_FALSE);
447
448         SFC_ASSERT(rc >= 0);
449         return -rc;
450 }
451
452 static int
453 sfc_dev_allmulti_enable(struct rte_eth_dev *dev)
454 {
455         int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_TRUE);
456
457         SFC_ASSERT(rc >= 0);
458         return -rc;
459 }
460
461 static int
462 sfc_dev_allmulti_disable(struct rte_eth_dev *dev)
463 {
464         int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_FALSE);
465
466         SFC_ASSERT(rc >= 0);
467         return -rc;
468 }
469
470 static int
471 sfc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
472                    uint16_t nb_rx_desc, unsigned int socket_id,
473                    const struct rte_eth_rxconf *rx_conf,
474                    struct rte_mempool *mb_pool)
475 {
476         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
477         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
478         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
479         struct sfc_rxq_info *rxq_info;
480         sfc_sw_index_t sw_index;
481         int rc;
482
483         sfc_log_init(sa, "RxQ=%u nb_rx_desc=%u socket_id=%u",
484                      ethdev_qid, nb_rx_desc, socket_id);
485
486         sfc_adapter_lock(sa);
487
488         sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
489         rc = sfc_rx_qinit(sa, sw_index, nb_rx_desc, socket_id,
490                           rx_conf, mb_pool);
491         if (rc != 0)
492                 goto fail_rx_qinit;
493
494         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
495         dev->data->rx_queues[ethdev_qid] = rxq_info->dp;
496
497         sfc_adapter_unlock(sa);
498
499         return 0;
500
501 fail_rx_qinit:
502         sfc_adapter_unlock(sa);
503         SFC_ASSERT(rc > 0);
504         return -rc;
505 }
506
507 static void
508 sfc_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
509 {
510         struct sfc_dp_rxq *dp_rxq = dev->data->rx_queues[qid];
511         struct sfc_rxq *rxq;
512         struct sfc_adapter *sa;
513         sfc_sw_index_t sw_index;
514
515         if (dp_rxq == NULL)
516                 return;
517
518         rxq = sfc_rxq_by_dp_rxq(dp_rxq);
519         sa = rxq->evq->sa;
520         sfc_adapter_lock(sa);
521
522         sw_index = dp_rxq->dpq.queue_id;
523
524         sfc_log_init(sa, "RxQ=%u", sw_index);
525
526         sfc_rx_qfini(sa, sw_index);
527
528         sfc_adapter_unlock(sa);
529 }
530
531 static int
532 sfc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
533                    uint16_t nb_tx_desc, unsigned int socket_id,
534                    const struct rte_eth_txconf *tx_conf)
535 {
536         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
537         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
538         struct sfc_txq_info *txq_info;
539         sfc_sw_index_t sw_index;
540         int rc;
541
542         sfc_log_init(sa, "TxQ = %u, nb_tx_desc = %u, socket_id = %u",
543                      ethdev_qid, nb_tx_desc, socket_id);
544
545         sfc_adapter_lock(sa);
546
547         sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
548         rc = sfc_tx_qinit(sa, sw_index, nb_tx_desc, socket_id, tx_conf);
549         if (rc != 0)
550                 goto fail_tx_qinit;
551
552         txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
553         dev->data->tx_queues[ethdev_qid] = txq_info->dp;
554
555         sfc_adapter_unlock(sa);
556         return 0;
557
558 fail_tx_qinit:
559         sfc_adapter_unlock(sa);
560         SFC_ASSERT(rc > 0);
561         return -rc;
562 }
563
564 static void
565 sfc_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
566 {
567         struct sfc_dp_txq *dp_txq = dev->data->tx_queues[qid];
568         struct sfc_txq *txq;
569         sfc_sw_index_t sw_index;
570         struct sfc_adapter *sa;
571
572         if (dp_txq == NULL)
573                 return;
574
575         txq = sfc_txq_by_dp_txq(dp_txq);
576         sw_index = dp_txq->dpq.queue_id;
577
578         SFC_ASSERT(txq->evq != NULL);
579         sa = txq->evq->sa;
580
581         sfc_log_init(sa, "TxQ = %u", sw_index);
582
583         sfc_adapter_lock(sa);
584
585         sfc_tx_qfini(sa, sw_index);
586
587         sfc_adapter_unlock(sa);
588 }
589
590 static void
591 sfc_stats_get_dp_rx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
592 {
593         struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
594         uint64_t pkts_sum = 0;
595         uint64_t bytes_sum = 0;
596         unsigned int i;
597
598         for (i = 0; i < sas->ethdev_rxq_count; ++i) {
599                 struct sfc_rxq_info *rxq_info;
600
601                 rxq_info = sfc_rxq_info_by_ethdev_qid(sas, i);
602                 if (rxq_info->state & SFC_RXQ_INITIALIZED) {
603                         union sfc_pkts_bytes qstats;
604
605                         sfc_pkts_bytes_get(&rxq_info->dp->dpq.stats, &qstats);
606                         pkts_sum += qstats.pkts -
607                                         sa->sw_stats.reset_rx_pkts[i];
608                         bytes_sum += qstats.bytes -
609                                         sa->sw_stats.reset_rx_bytes[i];
610                 }
611         }
612
613         *pkts = pkts_sum;
614         *bytes = bytes_sum;
615 }
616
617 static void
618 sfc_stats_get_dp_tx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
619 {
620         struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
621         uint64_t pkts_sum = 0;
622         uint64_t bytes_sum = 0;
623         unsigned int i;
624
625         for (i = 0; i < sas->ethdev_txq_count; ++i) {
626                 struct sfc_txq_info *txq_info;
627
628                 txq_info = sfc_txq_info_by_ethdev_qid(sas, i);
629                 if (txq_info->state & SFC_TXQ_INITIALIZED) {
630                         union sfc_pkts_bytes qstats;
631
632                         sfc_pkts_bytes_get(&txq_info->dp->dpq.stats, &qstats);
633                         pkts_sum += qstats.pkts -
634                                         sa->sw_stats.reset_tx_pkts[i];
635                         bytes_sum += qstats.bytes -
636                                         sa->sw_stats.reset_tx_bytes[i];
637                 }
638         }
639
640         *pkts = pkts_sum;
641         *bytes = bytes_sum;
642 }
643
644 /*
645  * Some statistics are computed as A - B where A and B each increase
646  * monotonically with some hardware counter(s) and the counters are read
647  * asynchronously.
648  *
649  * If packet X is counted in A, but not counted in B yet, computed value is
650  * greater than real.
651  *
652  * If packet X is not counted in A at the moment of reading the counter,
653  * but counted in B at the moment of reading the counter, computed value
654  * is less than real.
655  *
656  * However, counter which grows backward is worse evil than slightly wrong
657  * value. So, let's try to guarantee that it never happens except may be
658  * the case when the MAC stats are zeroed as a result of a NIC reset.
659  */
660 static void
661 sfc_update_diff_stat(uint64_t *stat, uint64_t newval)
662 {
663         if ((int64_t)(newval - *stat) > 0 || newval == 0)
664                 *stat = newval;
665 }
666
667 static int
668 sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
669 {
670         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
671         bool have_dp_rx_stats = sap->dp_rx->features & SFC_DP_RX_FEAT_STATS;
672         bool have_dp_tx_stats = sap->dp_tx->features & SFC_DP_TX_FEAT_STATS;
673         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
674         struct sfc_port *port = &sa->port;
675         uint64_t *mac_stats;
676         int ret;
677
678         sfc_adapter_lock(sa);
679
680         if (have_dp_rx_stats)
681                 sfc_stats_get_dp_rx(sa, &stats->ipackets, &stats->ibytes);
682         if (have_dp_tx_stats)
683                 sfc_stats_get_dp_tx(sa, &stats->opackets, &stats->obytes);
684
685         ret = sfc_port_update_mac_stats(sa, B_FALSE);
686         if (ret != 0)
687                 goto unlock;
688
689         mac_stats = port->mac_stats_buf;
690
691         if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask,
692                                    EFX_MAC_VADAPTER_RX_UNICAST_PACKETS)) {
693                 if (!have_dp_rx_stats) {
694                         stats->ipackets =
695                                 mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_PACKETS] +
696                                 mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_PACKETS] +
697                                 mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_PACKETS];
698                         stats->ibytes =
699                                 mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_BYTES] +
700                                 mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_BYTES] +
701                                 mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_BYTES];
702
703                         /* CRC is included in these stats, but shouldn't be */
704                         stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN;
705                 }
706                 if (!have_dp_tx_stats) {
707                         stats->opackets =
708                                 mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_PACKETS] +
709                                 mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_PACKETS] +
710                                 mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_PACKETS];
711                         stats->obytes =
712                                 mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_BYTES] +
713                                 mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_BYTES] +
714                                 mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES];
715
716                         /* CRC is included in these stats, but shouldn't be */
717                         stats->obytes -= stats->opackets * RTE_ETHER_CRC_LEN;
718                 }
719                 stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS];
720                 stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS];
721         } else {
722                 if (!have_dp_tx_stats) {
723                         stats->opackets = mac_stats[EFX_MAC_TX_PKTS];
724                         stats->obytes = mac_stats[EFX_MAC_TX_OCTETS] -
725                                 mac_stats[EFX_MAC_TX_PKTS] * RTE_ETHER_CRC_LEN;
726                 }
727
728                 /*
729                  * Take into account stats which are whenever supported
730                  * on EF10. If some stat is not supported by current
731                  * firmware variant or HW revision, it is guaranteed
732                  * to be zero in mac_stats.
733                  */
734                 stats->imissed =
735                         mac_stats[EFX_MAC_RX_NODESC_DROP_CNT] +
736                         mac_stats[EFX_MAC_PM_TRUNC_BB_OVERFLOW] +
737                         mac_stats[EFX_MAC_PM_DISCARD_BB_OVERFLOW] +
738                         mac_stats[EFX_MAC_PM_TRUNC_VFIFO_FULL] +
739                         mac_stats[EFX_MAC_PM_DISCARD_VFIFO_FULL] +
740                         mac_stats[EFX_MAC_PM_TRUNC_QBB] +
741                         mac_stats[EFX_MAC_PM_DISCARD_QBB] +
742                         mac_stats[EFX_MAC_PM_DISCARD_MAPPING] +
743                         mac_stats[EFX_MAC_RXDP_Q_DISABLED_PKTS] +
744                         mac_stats[EFX_MAC_RXDP_DI_DROPPED_PKTS];
745                 stats->ierrors =
746                         mac_stats[EFX_MAC_RX_FCS_ERRORS] +
747                         mac_stats[EFX_MAC_RX_ALIGN_ERRORS] +
748                         mac_stats[EFX_MAC_RX_JABBER_PKTS];
749                 /* no oerrors counters supported on EF10 */
750
751                 if (!have_dp_rx_stats) {
752                         /* Exclude missed, errors and pauses from Rx packets */
753                         sfc_update_diff_stat(&port->ipackets,
754                                 mac_stats[EFX_MAC_RX_PKTS] -
755                                 mac_stats[EFX_MAC_RX_PAUSE_PKTS] -
756                                 stats->imissed - stats->ierrors);
757                         stats->ipackets = port->ipackets;
758                         stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS] -
759                                 mac_stats[EFX_MAC_RX_PKTS] * RTE_ETHER_CRC_LEN;
760                 }
761         }
762
763 unlock:
764         sfc_adapter_unlock(sa);
765         SFC_ASSERT(ret >= 0);
766         return -ret;
767 }
768
769 static int
770 sfc_stats_reset(struct rte_eth_dev *dev)
771 {
772         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
773         struct sfc_port *port = &sa->port;
774         int rc;
775
776         sfc_adapter_lock(sa);
777
778         if (sa->state != SFC_ETHDEV_STARTED) {
779                 /*
780                  * The operation cannot be done if port is not started; it
781                  * will be scheduled to be done during the next port start
782                  */
783                 port->mac_stats_reset_pending = B_TRUE;
784                 sfc_adapter_unlock(sa);
785                 return 0;
786         }
787
788         rc = sfc_port_reset_mac_stats(sa);
789         if (rc != 0)
790                 sfc_err(sa, "failed to reset statistics (rc = %d)", rc);
791
792         sfc_sw_xstats_reset(sa);
793
794         sfc_adapter_unlock(sa);
795
796         SFC_ASSERT(rc >= 0);
797         return -rc;
798 }
799
800 static unsigned int
801 sfc_xstats_get_nb_supported(struct sfc_adapter *sa)
802 {
803         struct sfc_port *port = &sa->port;
804         unsigned int nb_supported;
805
806         sfc_adapter_lock(sa);
807         nb_supported = port->mac_stats_nb_supported +
808                        sfc_sw_xstats_get_nb_supported(sa);
809         sfc_adapter_unlock(sa);
810
811         return nb_supported;
812 }
813
814 static int
815 sfc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
816                unsigned int xstats_count)
817 {
818         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
819         unsigned int nb_written = 0;
820         unsigned int nb_supported = 0;
821         int rc;
822
823         if (unlikely(xstats == NULL))
824                 return sfc_xstats_get_nb_supported(sa);
825
826         rc = sfc_port_get_mac_stats(sa, xstats, xstats_count, &nb_written);
827         if (rc < 0)
828                 return rc;
829
830         nb_supported = rc;
831         sfc_sw_xstats_get_vals(sa, xstats, xstats_count, &nb_written,
832                                &nb_supported);
833
834         return nb_supported;
835 }
836
837 static int
838 sfc_xstats_get_names(struct rte_eth_dev *dev,
839                      struct rte_eth_xstat_name *xstats_names,
840                      unsigned int xstats_count)
841 {
842         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
843         struct sfc_port *port = &sa->port;
844         unsigned int i;
845         unsigned int nstats = 0;
846         unsigned int nb_written = 0;
847         int ret;
848
849         if (unlikely(xstats_names == NULL))
850                 return sfc_xstats_get_nb_supported(sa);
851
852         for (i = 0; i < EFX_MAC_NSTATS; ++i) {
853                 if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) {
854                         if (nstats < xstats_count) {
855                                 strlcpy(xstats_names[nstats].name,
856                                         efx_mac_stat_name(sa->nic, i),
857                                         sizeof(xstats_names[0].name));
858                                 nb_written++;
859                         }
860                         nstats++;
861                 }
862         }
863
864         ret = sfc_sw_xstats_get_names(sa, xstats_names, xstats_count,
865                                       &nb_written, &nstats);
866         if (ret != 0) {
867                 SFC_ASSERT(ret < 0);
868                 return ret;
869         }
870
871         return nstats;
872 }
873
874 static int
875 sfc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
876                      uint64_t *values, unsigned int n)
877 {
878         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
879         struct sfc_port *port = &sa->port;
880         unsigned int nb_supported;
881         unsigned int i;
882         int rc;
883
884         if (unlikely(ids == NULL || values == NULL))
885                 return -EINVAL;
886
887         /*
888          * Values array could be filled in nonsequential order. Fill values with
889          * constant indicating invalid ID first.
890          */
891         for (i = 0; i < n; i++)
892                 values[i] = SFC_XSTAT_ID_INVALID_VAL;
893
894         rc = sfc_port_get_mac_stats_by_id(sa, ids, values, n);
895         if (rc != 0)
896                 return rc;
897
898         nb_supported = port->mac_stats_nb_supported;
899         sfc_sw_xstats_get_vals_by_id(sa, ids, values, n, &nb_supported);
900
901         /* Return number of written stats before invalid ID is encountered. */
902         for (i = 0; i < n; i++) {
903                 if (values[i] == SFC_XSTAT_ID_INVALID_VAL)
904                         return i;
905         }
906
907         return n;
908 }
909
910 static int
911 sfc_xstats_get_names_by_id(struct rte_eth_dev *dev,
912                            const uint64_t *ids,
913                            struct rte_eth_xstat_name *xstats_names,
914                            unsigned int size)
915 {
916         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
917         struct sfc_port *port = &sa->port;
918         unsigned int nb_supported;
919         unsigned int i;
920         int ret;
921
922         if (unlikely(xstats_names == NULL && ids != NULL) ||
923             unlikely(xstats_names != NULL && ids == NULL))
924                 return -EINVAL;
925
926         if (unlikely(xstats_names == NULL && ids == NULL))
927                 return sfc_xstats_get_nb_supported(sa);
928
929         /*
930          * Names array could be filled in nonsequential order. Fill names with
931          * string indicating invalid ID first.
932          */
933         for (i = 0; i < size; i++)
934                 xstats_names[i].name[0] = SFC_XSTAT_ID_INVALID_NAME;
935
936         sfc_adapter_lock(sa);
937
938         SFC_ASSERT(port->mac_stats_nb_supported <=
939                    RTE_DIM(port->mac_stats_by_id));
940
941         for (i = 0; i < size; i++) {
942                 if (ids[i] < port->mac_stats_nb_supported) {
943                         strlcpy(xstats_names[i].name,
944                                 efx_mac_stat_name(sa->nic,
945                                                  port->mac_stats_by_id[ids[i]]),
946                                 sizeof(xstats_names[0].name));
947                 }
948         }
949
950         nb_supported = port->mac_stats_nb_supported;
951
952         sfc_adapter_unlock(sa);
953
954         ret = sfc_sw_xstats_get_names_by_id(sa, ids, xstats_names, size,
955                                             &nb_supported);
956         if (ret != 0) {
957                 SFC_ASSERT(ret < 0);
958                 return ret;
959         }
960
961         /* Return number of written names before invalid ID is encountered. */
962         for (i = 0; i < size; i++) {
963                 if (xstats_names[i].name[0] == SFC_XSTAT_ID_INVALID_NAME)
964                         return i;
965         }
966
967         return size;
968 }
969
970 static int
971 sfc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
972 {
973         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
974         unsigned int wanted_fc, link_fc;
975
976         memset(fc_conf, 0, sizeof(*fc_conf));
977
978         sfc_adapter_lock(sa);
979
980         if (sa->state == SFC_ETHDEV_STARTED)
981                 efx_mac_fcntl_get(sa->nic, &wanted_fc, &link_fc);
982         else
983                 link_fc = sa->port.flow_ctrl;
984
985         switch (link_fc) {
986         case 0:
987                 fc_conf->mode = RTE_FC_NONE;
988                 break;
989         case EFX_FCNTL_RESPOND:
990                 fc_conf->mode = RTE_FC_RX_PAUSE;
991                 break;
992         case EFX_FCNTL_GENERATE:
993                 fc_conf->mode = RTE_FC_TX_PAUSE;
994                 break;
995         case (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE):
996                 fc_conf->mode = RTE_FC_FULL;
997                 break;
998         default:
999                 sfc_err(sa, "%s: unexpected flow control value %#x",
1000                         __func__, link_fc);
1001         }
1002
1003         fc_conf->autoneg = sa->port.flow_ctrl_autoneg;
1004
1005         sfc_adapter_unlock(sa);
1006
1007         return 0;
1008 }
1009
1010 static int
1011 sfc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
1012 {
1013         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1014         struct sfc_port *port = &sa->port;
1015         unsigned int fcntl;
1016         int rc;
1017
1018         if (fc_conf->high_water != 0 || fc_conf->low_water != 0 ||
1019             fc_conf->pause_time != 0 || fc_conf->send_xon != 0 ||
1020             fc_conf->mac_ctrl_frame_fwd != 0) {
1021                 sfc_err(sa, "unsupported flow control settings specified");
1022                 rc = EINVAL;
1023                 goto fail_inval;
1024         }
1025
1026         switch (fc_conf->mode) {
1027         case RTE_FC_NONE:
1028                 fcntl = 0;
1029                 break;
1030         case RTE_FC_RX_PAUSE:
1031                 fcntl = EFX_FCNTL_RESPOND;
1032                 break;
1033         case RTE_FC_TX_PAUSE:
1034                 fcntl = EFX_FCNTL_GENERATE;
1035                 break;
1036         case RTE_FC_FULL:
1037                 fcntl = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
1038                 break;
1039         default:
1040                 rc = EINVAL;
1041                 goto fail_inval;
1042         }
1043
1044         sfc_adapter_lock(sa);
1045
1046         if (sa->state == SFC_ETHDEV_STARTED) {
1047                 rc = efx_mac_fcntl_set(sa->nic, fcntl, fc_conf->autoneg);
1048                 if (rc != 0)
1049                         goto fail_mac_fcntl_set;
1050         }
1051
1052         port->flow_ctrl = fcntl;
1053         port->flow_ctrl_autoneg = fc_conf->autoneg;
1054
1055         sfc_adapter_unlock(sa);
1056
1057         return 0;
1058
1059 fail_mac_fcntl_set:
1060         sfc_adapter_unlock(sa);
1061 fail_inval:
1062         SFC_ASSERT(rc > 0);
1063         return -rc;
1064 }
1065
1066 static int
1067 sfc_check_scatter_on_all_rx_queues(struct sfc_adapter *sa, size_t pdu)
1068 {
1069         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
1070         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1071         boolean_t scatter_enabled;
1072         const char *error;
1073         unsigned int i;
1074
1075         for (i = 0; i < sas->rxq_count; i++) {
1076                 if ((sas->rxq_info[i].state & SFC_RXQ_INITIALIZED) == 0)
1077                         continue;
1078
1079                 scatter_enabled = (sas->rxq_info[i].type_flags &
1080                                    EFX_RXQ_FLAG_SCATTER);
1081
1082                 if (!sfc_rx_check_scatter(pdu, sa->rxq_ctrl[i].buf_size,
1083                                           encp->enc_rx_prefix_size,
1084                                           scatter_enabled,
1085                                           encp->enc_rx_scatter_max, &error)) {
1086                         sfc_err(sa, "MTU check for RxQ %u failed: %s", i,
1087                                 error);
1088                         return EINVAL;
1089                 }
1090         }
1091
1092         return 0;
1093 }
1094
1095 static int
1096 sfc_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
1097 {
1098         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1099         size_t pdu = EFX_MAC_PDU(mtu);
1100         size_t old_pdu;
1101         int rc;
1102
1103         sfc_log_init(sa, "mtu=%u", mtu);
1104
1105         rc = EINVAL;
1106         if (pdu < EFX_MAC_PDU_MIN) {
1107                 sfc_err(sa, "too small MTU %u (PDU size %u less than min %u)",
1108                         (unsigned int)mtu, (unsigned int)pdu,
1109                         EFX_MAC_PDU_MIN);
1110                 goto fail_inval;
1111         }
1112         if (pdu > EFX_MAC_PDU_MAX) {
1113                 sfc_err(sa, "too big MTU %u (PDU size %u greater than max %u)",
1114                         (unsigned int)mtu, (unsigned int)pdu,
1115                         (unsigned int)EFX_MAC_PDU_MAX);
1116                 goto fail_inval;
1117         }
1118
1119         sfc_adapter_lock(sa);
1120
1121         rc = sfc_check_scatter_on_all_rx_queues(sa, pdu);
1122         if (rc != 0)
1123                 goto fail_check_scatter;
1124
1125         if (pdu != sa->port.pdu) {
1126                 if (sa->state == SFC_ETHDEV_STARTED) {
1127                         sfc_stop(sa);
1128
1129                         old_pdu = sa->port.pdu;
1130                         sa->port.pdu = pdu;
1131                         rc = sfc_start(sa);
1132                         if (rc != 0)
1133                                 goto fail_start;
1134                 } else {
1135                         sa->port.pdu = pdu;
1136                 }
1137         }
1138
1139         /*
1140          * The driver does not use it, but other PMDs update jumbo frame
1141          * flag and max_rx_pkt_len when MTU is set.
1142          */
1143         if (mtu > RTE_ETHER_MTU) {
1144                 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1145                 rxmode->offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
1146         }
1147
1148         dev->data->dev_conf.rxmode.max_rx_pkt_len = sa->port.pdu;
1149
1150         sfc_adapter_unlock(sa);
1151
1152         sfc_log_init(sa, "done");
1153         return 0;
1154
1155 fail_start:
1156         sa->port.pdu = old_pdu;
1157         if (sfc_start(sa) != 0)
1158                 sfc_err(sa, "cannot start with neither new (%u) nor old (%u) "
1159                         "PDU max size - port is stopped",
1160                         (unsigned int)pdu, (unsigned int)old_pdu);
1161
1162 fail_check_scatter:
1163         sfc_adapter_unlock(sa);
1164
1165 fail_inval:
1166         sfc_log_init(sa, "failed %d", rc);
1167         SFC_ASSERT(rc > 0);
1168         return -rc;
1169 }
1170 static int
1171 sfc_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1172 {
1173         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1174         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1175         struct sfc_port *port = &sa->port;
1176         struct rte_ether_addr *old_addr = &dev->data->mac_addrs[0];
1177         int rc = 0;
1178
1179         sfc_adapter_lock(sa);
1180
1181         if (rte_is_same_ether_addr(mac_addr, &port->default_mac_addr))
1182                 goto unlock;
1183
1184         /*
1185          * Copy the address to the device private data so that
1186          * it could be recalled in the case of adapter restart.
1187          */
1188         rte_ether_addr_copy(mac_addr, &port->default_mac_addr);
1189
1190         /*
1191          * Neither of the two following checks can return
1192          * an error. The new MAC address is preserved in
1193          * the device private data and can be activated
1194          * on the next port start if the user prevents
1195          * isolated mode from being enabled.
1196          */
1197         if (sfc_sa2shared(sa)->isolated) {
1198                 sfc_warn(sa, "isolated mode is active on the port");
1199                 sfc_warn(sa, "will not set MAC address");
1200                 goto unlock;
1201         }
1202
1203         if (sa->state != SFC_ETHDEV_STARTED) {
1204                 sfc_notice(sa, "the port is not started");
1205                 sfc_notice(sa, "the new MAC address will be set on port start");
1206
1207                 goto unlock;
1208         }
1209
1210         if (encp->enc_allow_set_mac_with_installed_filters) {
1211                 rc = efx_mac_addr_set(sa->nic, mac_addr->addr_bytes);
1212                 if (rc != 0) {
1213                         sfc_err(sa, "cannot set MAC address (rc = %u)", rc);
1214                         goto unlock;
1215                 }
1216
1217                 /*
1218                  * Changing the MAC address by means of MCDI request
1219                  * has no effect on received traffic, therefore
1220                  * we also need to update unicast filters
1221                  */
1222                 rc = sfc_set_rx_mode_unchecked(sa);
1223                 if (rc != 0) {
1224                         sfc_err(sa, "cannot set filter (rc = %u)", rc);
1225                         /* Rollback the old address */
1226                         (void)efx_mac_addr_set(sa->nic, old_addr->addr_bytes);
1227                         (void)sfc_set_rx_mode_unchecked(sa);
1228                 }
1229         } else {
1230                 sfc_warn(sa, "cannot set MAC address with filters installed");
1231                 sfc_warn(sa, "adapter will be restarted to pick the new MAC");
1232                 sfc_warn(sa, "(some traffic may be dropped)");
1233
1234                 /*
1235                  * Since setting MAC address with filters installed is not
1236                  * allowed on the adapter, the new MAC address will be set
1237                  * by means of adapter restart. sfc_start() shall retrieve
1238                  * the new address from the device private data and set it.
1239                  */
1240                 sfc_stop(sa);
1241                 rc = sfc_start(sa);
1242                 if (rc != 0)
1243                         sfc_err(sa, "cannot restart adapter (rc = %u)", rc);
1244         }
1245
1246 unlock:
1247         if (rc != 0)
1248                 rte_ether_addr_copy(old_addr, &port->default_mac_addr);
1249
1250         sfc_adapter_unlock(sa);
1251
1252         SFC_ASSERT(rc >= 0);
1253         return -rc;
1254 }
1255
1256
1257 static int
1258 sfc_set_mc_addr_list(struct rte_eth_dev *dev,
1259                 struct rte_ether_addr *mc_addr_set, uint32_t nb_mc_addr)
1260 {
1261         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1262         struct sfc_port *port = &sa->port;
1263         uint8_t *mc_addrs = port->mcast_addrs;
1264         int rc;
1265         unsigned int i;
1266
1267         if (sfc_sa2shared(sa)->isolated) {
1268                 sfc_err(sa, "isolated mode is active on the port");
1269                 sfc_err(sa, "will not set multicast address list");
1270                 return -ENOTSUP;
1271         }
1272
1273         if (mc_addrs == NULL)
1274                 return -ENOBUFS;
1275
1276         if (nb_mc_addr > port->max_mcast_addrs) {
1277                 sfc_err(sa, "too many multicast addresses: %u > %u",
1278                          nb_mc_addr, port->max_mcast_addrs);
1279                 return -EINVAL;
1280         }
1281
1282         for (i = 0; i < nb_mc_addr; ++i) {
1283                 rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes,
1284                                  EFX_MAC_ADDR_LEN);
1285                 mc_addrs += EFX_MAC_ADDR_LEN;
1286         }
1287
1288         port->nb_mcast_addrs = nb_mc_addr;
1289
1290         if (sa->state != SFC_ETHDEV_STARTED)
1291                 return 0;
1292
1293         rc = efx_mac_multicast_list_set(sa->nic, port->mcast_addrs,
1294                                         port->nb_mcast_addrs);
1295         if (rc != 0)
1296                 sfc_err(sa, "cannot set multicast address list (rc = %u)", rc);
1297
1298         SFC_ASSERT(rc >= 0);
1299         return -rc;
1300 }
1301
1302 /*
1303  * The function is used by the secondary process as well. It must not
1304  * use any process-local pointers from the adapter data.
1305  */
1306 static void
1307 sfc_rx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1308                       struct rte_eth_rxq_info *qinfo)
1309 {
1310         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1311         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1312         struct sfc_rxq_info *rxq_info;
1313
1314         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1315
1316         qinfo->mp = rxq_info->refill_mb_pool;
1317         qinfo->conf.rx_free_thresh = rxq_info->refill_threshold;
1318         qinfo->conf.rx_drop_en = 1;
1319         qinfo->conf.rx_deferred_start = rxq_info->deferred_start;
1320         qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
1321         if (rxq_info->type_flags & EFX_RXQ_FLAG_SCATTER) {
1322                 qinfo->conf.offloads |= DEV_RX_OFFLOAD_SCATTER;
1323                 qinfo->scattered_rx = 1;
1324         }
1325         qinfo->nb_desc = rxq_info->entries;
1326 }
1327
1328 /*
1329  * The function is used by the secondary process as well. It must not
1330  * use any process-local pointers from the adapter data.
1331  */
1332 static void
1333 sfc_tx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1334                       struct rte_eth_txq_info *qinfo)
1335 {
1336         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1337         struct sfc_txq_info *txq_info;
1338
1339         SFC_ASSERT(ethdev_qid < sas->ethdev_txq_count);
1340
1341         txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1342
1343         memset(qinfo, 0, sizeof(*qinfo));
1344
1345         qinfo->conf.offloads = txq_info->offloads;
1346         qinfo->conf.tx_free_thresh = txq_info->free_thresh;
1347         qinfo->conf.tx_deferred_start = txq_info->deferred_start;
1348         qinfo->nb_desc = txq_info->entries;
1349 }
1350
1351 /*
1352  * The function is used by the secondary process as well. It must not
1353  * use any process-local pointers from the adapter data.
1354  */
1355 static uint32_t
1356 sfc_rx_queue_count(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1357 {
1358         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1359         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1360         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1361         struct sfc_rxq_info *rxq_info;
1362
1363         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1364
1365         if ((rxq_info->state & SFC_RXQ_STARTED) == 0)
1366                 return 0;
1367
1368         return sap->dp_rx->qdesc_npending(rxq_info->dp);
1369 }
1370
1371 /*
1372  * The function is used by the secondary process as well. It must not
1373  * use any process-local pointers from the adapter data.
1374  */
1375 static int
1376 sfc_rx_descriptor_status(void *queue, uint16_t offset)
1377 {
1378         struct sfc_dp_rxq *dp_rxq = queue;
1379         const struct sfc_dp_rx *dp_rx;
1380
1381         dp_rx = sfc_dp_rx_by_dp_rxq(dp_rxq);
1382
1383         return dp_rx->qdesc_status(dp_rxq, offset);
1384 }
1385
1386 /*
1387  * The function is used by the secondary process as well. It must not
1388  * use any process-local pointers from the adapter data.
1389  */
1390 static int
1391 sfc_tx_descriptor_status(void *queue, uint16_t offset)
1392 {
1393         struct sfc_dp_txq *dp_txq = queue;
1394         const struct sfc_dp_tx *dp_tx;
1395
1396         dp_tx = sfc_dp_tx_by_dp_txq(dp_txq);
1397
1398         return dp_tx->qdesc_status(dp_txq, offset);
1399 }
1400
1401 static int
1402 sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1403 {
1404         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1405         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1406         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1407         struct sfc_rxq_info *rxq_info;
1408         sfc_sw_index_t sw_index;
1409         int rc;
1410
1411         sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1412
1413         sfc_adapter_lock(sa);
1414
1415         rc = EINVAL;
1416         if (sa->state != SFC_ETHDEV_STARTED)
1417                 goto fail_not_started;
1418
1419         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1420         if (rxq_info->state != SFC_RXQ_INITIALIZED)
1421                 goto fail_not_setup;
1422
1423         sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1424         rc = sfc_rx_qstart(sa, sw_index);
1425         if (rc != 0)
1426                 goto fail_rx_qstart;
1427
1428         rxq_info->deferred_started = B_TRUE;
1429
1430         sfc_adapter_unlock(sa);
1431
1432         return 0;
1433
1434 fail_rx_qstart:
1435 fail_not_setup:
1436 fail_not_started:
1437         sfc_adapter_unlock(sa);
1438         SFC_ASSERT(rc > 0);
1439         return -rc;
1440 }
1441
1442 static int
1443 sfc_rx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1444 {
1445         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1446         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1447         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1448         struct sfc_rxq_info *rxq_info;
1449         sfc_sw_index_t sw_index;
1450
1451         sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1452
1453         sfc_adapter_lock(sa);
1454
1455         sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1456         sfc_rx_qstop(sa, sw_index);
1457
1458         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1459         rxq_info->deferred_started = B_FALSE;
1460
1461         sfc_adapter_unlock(sa);
1462
1463         return 0;
1464 }
1465
1466 static int
1467 sfc_tx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1468 {
1469         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1470         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1471         struct sfc_txq_info *txq_info;
1472         sfc_sw_index_t sw_index;
1473         int rc;
1474
1475         sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1476
1477         sfc_adapter_lock(sa);
1478
1479         rc = EINVAL;
1480         if (sa->state != SFC_ETHDEV_STARTED)
1481                 goto fail_not_started;
1482
1483         txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1484         if (txq_info->state != SFC_TXQ_INITIALIZED)
1485                 goto fail_not_setup;
1486
1487         sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1488         rc = sfc_tx_qstart(sa, sw_index);
1489         if (rc != 0)
1490                 goto fail_tx_qstart;
1491
1492         txq_info->deferred_started = B_TRUE;
1493
1494         sfc_adapter_unlock(sa);
1495         return 0;
1496
1497 fail_tx_qstart:
1498
1499 fail_not_setup:
1500 fail_not_started:
1501         sfc_adapter_unlock(sa);
1502         SFC_ASSERT(rc > 0);
1503         return -rc;
1504 }
1505
1506 static int
1507 sfc_tx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1508 {
1509         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1510         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1511         struct sfc_txq_info *txq_info;
1512         sfc_sw_index_t sw_index;
1513
1514         sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1515
1516         sfc_adapter_lock(sa);
1517
1518         sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1519         sfc_tx_qstop(sa, sw_index);
1520
1521         txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1522         txq_info->deferred_started = B_FALSE;
1523
1524         sfc_adapter_unlock(sa);
1525         return 0;
1526 }
1527
1528 static efx_tunnel_protocol_t
1529 sfc_tunnel_rte_type_to_efx_udp_proto(enum rte_eth_tunnel_type rte_type)
1530 {
1531         switch (rte_type) {
1532         case RTE_TUNNEL_TYPE_VXLAN:
1533                 return EFX_TUNNEL_PROTOCOL_VXLAN;
1534         case RTE_TUNNEL_TYPE_GENEVE:
1535                 return EFX_TUNNEL_PROTOCOL_GENEVE;
1536         default:
1537                 return EFX_TUNNEL_NPROTOS;
1538         }
1539 }
1540
1541 enum sfc_udp_tunnel_op_e {
1542         SFC_UDP_TUNNEL_ADD_PORT,
1543         SFC_UDP_TUNNEL_DEL_PORT,
1544 };
1545
1546 static int
1547 sfc_dev_udp_tunnel_op(struct rte_eth_dev *dev,
1548                       struct rte_eth_udp_tunnel *tunnel_udp,
1549                       enum sfc_udp_tunnel_op_e op)
1550 {
1551         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1552         efx_tunnel_protocol_t tunnel_proto;
1553         int rc;
1554
1555         sfc_log_init(sa, "%s udp_port=%u prot_type=%u",
1556                      (op == SFC_UDP_TUNNEL_ADD_PORT) ? "add" :
1557                      (op == SFC_UDP_TUNNEL_DEL_PORT) ? "delete" : "unknown",
1558                      tunnel_udp->udp_port, tunnel_udp->prot_type);
1559
1560         tunnel_proto =
1561                 sfc_tunnel_rte_type_to_efx_udp_proto(tunnel_udp->prot_type);
1562         if (tunnel_proto >= EFX_TUNNEL_NPROTOS) {
1563                 rc = ENOTSUP;
1564                 goto fail_bad_proto;
1565         }
1566
1567         sfc_adapter_lock(sa);
1568
1569         switch (op) {
1570         case SFC_UDP_TUNNEL_ADD_PORT:
1571                 rc = efx_tunnel_config_udp_add(sa->nic,
1572                                                tunnel_udp->udp_port,
1573                                                tunnel_proto);
1574                 break;
1575         case SFC_UDP_TUNNEL_DEL_PORT:
1576                 rc = efx_tunnel_config_udp_remove(sa->nic,
1577                                                   tunnel_udp->udp_port,
1578                                                   tunnel_proto);
1579                 break;
1580         default:
1581                 rc = EINVAL;
1582                 goto fail_bad_op;
1583         }
1584
1585         if (rc != 0)
1586                 goto fail_op;
1587
1588         if (sa->state == SFC_ETHDEV_STARTED) {
1589                 rc = efx_tunnel_reconfigure(sa->nic);
1590                 if (rc == EAGAIN) {
1591                         /*
1592                          * Configuration is accepted by FW and MC reboot
1593                          * is initiated to apply the changes. MC reboot
1594                          * will be handled in a usual way (MC reboot
1595                          * event on management event queue and adapter
1596                          * restart).
1597                          */
1598                         rc = 0;
1599                 } else if (rc != 0) {
1600                         goto fail_reconfigure;
1601                 }
1602         }
1603
1604         sfc_adapter_unlock(sa);
1605         return 0;
1606
1607 fail_reconfigure:
1608         /* Remove/restore entry since the change makes the trouble */
1609         switch (op) {
1610         case SFC_UDP_TUNNEL_ADD_PORT:
1611                 (void)efx_tunnel_config_udp_remove(sa->nic,
1612                                                    tunnel_udp->udp_port,
1613                                                    tunnel_proto);
1614                 break;
1615         case SFC_UDP_TUNNEL_DEL_PORT:
1616                 (void)efx_tunnel_config_udp_add(sa->nic,
1617                                                 tunnel_udp->udp_port,
1618                                                 tunnel_proto);
1619                 break;
1620         }
1621
1622 fail_op:
1623 fail_bad_op:
1624         sfc_adapter_unlock(sa);
1625
1626 fail_bad_proto:
1627         SFC_ASSERT(rc > 0);
1628         return -rc;
1629 }
1630
1631 static int
1632 sfc_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
1633                             struct rte_eth_udp_tunnel *tunnel_udp)
1634 {
1635         return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_ADD_PORT);
1636 }
1637
1638 static int
1639 sfc_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
1640                             struct rte_eth_udp_tunnel *tunnel_udp)
1641 {
1642         return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_DEL_PORT);
1643 }
1644
1645 /*
1646  * The function is used by the secondary process as well. It must not
1647  * use any process-local pointers from the adapter data.
1648  */
1649 static int
1650 sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
1651                           struct rte_eth_rss_conf *rss_conf)
1652 {
1653         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1654         struct sfc_rss *rss = &sas->rss;
1655
1656         if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE)
1657                 return -ENOTSUP;
1658
1659         /*
1660          * Mapping of hash configuration between RTE and EFX is not one-to-one,
1661          * hence, conversion is done here to derive a correct set of ETH_RSS
1662          * flags which corresponds to the active EFX configuration stored
1663          * locally in 'sfc_adapter' and kept up-to-date
1664          */
1665         rss_conf->rss_hf = sfc_rx_hf_efx_to_rte(rss, rss->hash_types);
1666         rss_conf->rss_key_len = EFX_RSS_KEY_SIZE;
1667         if (rss_conf->rss_key != NULL)
1668                 rte_memcpy(rss_conf->rss_key, rss->key, EFX_RSS_KEY_SIZE);
1669
1670         return 0;
1671 }
1672
1673 static int
1674 sfc_dev_rss_hash_update(struct rte_eth_dev *dev,
1675                         struct rte_eth_rss_conf *rss_conf)
1676 {
1677         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1678         struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1679         unsigned int efx_hash_types;
1680         uint32_t contexts[] = {EFX_RSS_CONTEXT_DEFAULT, rss->dummy_rss_context};
1681         unsigned int n_contexts;
1682         unsigned int mode_i = 0;
1683         unsigned int key_i = 0;
1684         unsigned int i = 0;
1685         int rc = 0;
1686
1687         n_contexts = rss->dummy_rss_context == EFX_RSS_CONTEXT_DEFAULT ? 1 : 2;
1688
1689         if (sfc_sa2shared(sa)->isolated)
1690                 return -ENOTSUP;
1691
1692         if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1693                 sfc_err(sa, "RSS is not available");
1694                 return -ENOTSUP;
1695         }
1696
1697         if (rss->channels == 0) {
1698                 sfc_err(sa, "RSS is not configured");
1699                 return -EINVAL;
1700         }
1701
1702         if ((rss_conf->rss_key != NULL) &&
1703             (rss_conf->rss_key_len != sizeof(rss->key))) {
1704                 sfc_err(sa, "RSS key size is wrong (should be %zu)",
1705                         sizeof(rss->key));
1706                 return -EINVAL;
1707         }
1708
1709         sfc_adapter_lock(sa);
1710
1711         rc = sfc_rx_hf_rte_to_efx(sa, rss_conf->rss_hf, &efx_hash_types);
1712         if (rc != 0)
1713                 goto fail_rx_hf_rte_to_efx;
1714
1715         for (mode_i = 0; mode_i < n_contexts; mode_i++) {
1716                 rc = efx_rx_scale_mode_set(sa->nic, contexts[mode_i],
1717                                            rss->hash_alg, efx_hash_types,
1718                                            B_TRUE);
1719                 if (rc != 0)
1720                         goto fail_scale_mode_set;
1721         }
1722
1723         if (rss_conf->rss_key != NULL) {
1724                 if (sa->state == SFC_ETHDEV_STARTED) {
1725                         for (key_i = 0; key_i < n_contexts; key_i++) {
1726                                 rc = efx_rx_scale_key_set(sa->nic,
1727                                                           contexts[key_i],
1728                                                           rss_conf->rss_key,
1729                                                           sizeof(rss->key));
1730                                 if (rc != 0)
1731                                         goto fail_scale_key_set;
1732                         }
1733                 }
1734
1735                 rte_memcpy(rss->key, rss_conf->rss_key, sizeof(rss->key));
1736         }
1737
1738         rss->hash_types = efx_hash_types;
1739
1740         sfc_adapter_unlock(sa);
1741
1742         return 0;
1743
1744 fail_scale_key_set:
1745         for (i = 0; i < key_i; i++) {
1746                 if (efx_rx_scale_key_set(sa->nic, contexts[i], rss->key,
1747                                          sizeof(rss->key)) != 0)
1748                         sfc_err(sa, "failed to restore RSS key");
1749         }
1750
1751 fail_scale_mode_set:
1752         for (i = 0; i < mode_i; i++) {
1753                 if (efx_rx_scale_mode_set(sa->nic, contexts[i],
1754                                           EFX_RX_HASHALG_TOEPLITZ,
1755                                           rss->hash_types, B_TRUE) != 0)
1756                         sfc_err(sa, "failed to restore RSS mode");
1757         }
1758
1759 fail_rx_hf_rte_to_efx:
1760         sfc_adapter_unlock(sa);
1761         return -rc;
1762 }
1763
1764 /*
1765  * The function is used by the secondary process as well. It must not
1766  * use any process-local pointers from the adapter data.
1767  */
1768 static int
1769 sfc_dev_rss_reta_query(struct rte_eth_dev *dev,
1770                        struct rte_eth_rss_reta_entry64 *reta_conf,
1771                        uint16_t reta_size)
1772 {
1773         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1774         struct sfc_rss *rss = &sas->rss;
1775         int entry;
1776
1777         if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE || sas->isolated)
1778                 return -ENOTSUP;
1779
1780         if (rss->channels == 0)
1781                 return -EINVAL;
1782
1783         if (reta_size != EFX_RSS_TBL_SIZE)
1784                 return -EINVAL;
1785
1786         for (entry = 0; entry < reta_size; entry++) {
1787                 int grp = entry / RTE_RETA_GROUP_SIZE;
1788                 int grp_idx = entry % RTE_RETA_GROUP_SIZE;
1789
1790                 if ((reta_conf[grp].mask >> grp_idx) & 1)
1791                         reta_conf[grp].reta[grp_idx] = rss->tbl[entry];
1792         }
1793
1794         return 0;
1795 }
1796
1797 static int
1798 sfc_dev_rss_reta_update(struct rte_eth_dev *dev,
1799                         struct rte_eth_rss_reta_entry64 *reta_conf,
1800                         uint16_t reta_size)
1801 {
1802         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1803         struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1804         unsigned int *rss_tbl_new;
1805         uint16_t entry;
1806         int rc = 0;
1807
1808
1809         if (sfc_sa2shared(sa)->isolated)
1810                 return -ENOTSUP;
1811
1812         if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1813                 sfc_err(sa, "RSS is not available");
1814                 return -ENOTSUP;
1815         }
1816
1817         if (rss->channels == 0) {
1818                 sfc_err(sa, "RSS is not configured");
1819                 return -EINVAL;
1820         }
1821
1822         if (reta_size != EFX_RSS_TBL_SIZE) {
1823                 sfc_err(sa, "RETA size is wrong (should be %u)",
1824                         EFX_RSS_TBL_SIZE);
1825                 return -EINVAL;
1826         }
1827
1828         rss_tbl_new = rte_zmalloc("rss_tbl_new", sizeof(rss->tbl), 0);
1829         if (rss_tbl_new == NULL)
1830                 return -ENOMEM;
1831
1832         sfc_adapter_lock(sa);
1833
1834         rte_memcpy(rss_tbl_new, rss->tbl, sizeof(rss->tbl));
1835
1836         for (entry = 0; entry < reta_size; entry++) {
1837                 int grp_idx = entry % RTE_RETA_GROUP_SIZE;
1838                 struct rte_eth_rss_reta_entry64 *grp;
1839
1840                 grp = &reta_conf[entry / RTE_RETA_GROUP_SIZE];
1841
1842                 if (grp->mask & (1ull << grp_idx)) {
1843                         if (grp->reta[grp_idx] >= rss->channels) {
1844                                 rc = EINVAL;
1845                                 goto bad_reta_entry;
1846                         }
1847                         rss_tbl_new[entry] = grp->reta[grp_idx];
1848                 }
1849         }
1850
1851         if (sa->state == SFC_ETHDEV_STARTED) {
1852                 rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1853                                           rss_tbl_new, EFX_RSS_TBL_SIZE);
1854                 if (rc != 0)
1855                         goto fail_scale_tbl_set;
1856         }
1857
1858         rte_memcpy(rss->tbl, rss_tbl_new, sizeof(rss->tbl));
1859
1860 fail_scale_tbl_set:
1861 bad_reta_entry:
1862         sfc_adapter_unlock(sa);
1863
1864         rte_free(rss_tbl_new);
1865
1866         SFC_ASSERT(rc >= 0);
1867         return -rc;
1868 }
1869
1870 static int
1871 sfc_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
1872                      const struct rte_flow_ops **ops)
1873 {
1874         *ops = &sfc_flow_ops;
1875         return 0;
1876 }
1877
1878 static int
1879 sfc_pool_ops_supported(struct rte_eth_dev *dev, const char *pool)
1880 {
1881         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1882
1883         /*
1884          * If Rx datapath does not provide callback to check mempool,
1885          * all pools are supported.
1886          */
1887         if (sap->dp_rx->pool_ops_supported == NULL)
1888                 return 1;
1889
1890         return sap->dp_rx->pool_ops_supported(pool);
1891 }
1892
1893 static int
1894 sfc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1895 {
1896         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1897         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1898         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1899         struct sfc_rxq_info *rxq_info;
1900
1901         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1902
1903         return sap->dp_rx->intr_enable(rxq_info->dp);
1904 }
1905
1906 static int
1907 sfc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1908 {
1909         const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1910         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1911         sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1912         struct sfc_rxq_info *rxq_info;
1913
1914         rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1915
1916         return sap->dp_rx->intr_disable(rxq_info->dp);
1917 }
1918
1919 static const struct eth_dev_ops sfc_eth_dev_ops = {
1920         .dev_configure                  = sfc_dev_configure,
1921         .dev_start                      = sfc_dev_start,
1922         .dev_stop                       = sfc_dev_stop,
1923         .dev_set_link_up                = sfc_dev_set_link_up,
1924         .dev_set_link_down              = sfc_dev_set_link_down,
1925         .dev_close                      = sfc_dev_close,
1926         .promiscuous_enable             = sfc_dev_promisc_enable,
1927         .promiscuous_disable            = sfc_dev_promisc_disable,
1928         .allmulticast_enable            = sfc_dev_allmulti_enable,
1929         .allmulticast_disable           = sfc_dev_allmulti_disable,
1930         .link_update                    = sfc_dev_link_update,
1931         .stats_get                      = sfc_stats_get,
1932         .stats_reset                    = sfc_stats_reset,
1933         .xstats_get                     = sfc_xstats_get,
1934         .xstats_reset                   = sfc_stats_reset,
1935         .xstats_get_names               = sfc_xstats_get_names,
1936         .dev_infos_get                  = sfc_dev_infos_get,
1937         .dev_supported_ptypes_get       = sfc_dev_supported_ptypes_get,
1938         .mtu_set                        = sfc_dev_set_mtu,
1939         .rx_queue_start                 = sfc_rx_queue_start,
1940         .rx_queue_stop                  = sfc_rx_queue_stop,
1941         .tx_queue_start                 = sfc_tx_queue_start,
1942         .tx_queue_stop                  = sfc_tx_queue_stop,
1943         .rx_queue_setup                 = sfc_rx_queue_setup,
1944         .rx_queue_release               = sfc_rx_queue_release,
1945         .rx_queue_intr_enable           = sfc_rx_queue_intr_enable,
1946         .rx_queue_intr_disable          = sfc_rx_queue_intr_disable,
1947         .tx_queue_setup                 = sfc_tx_queue_setup,
1948         .tx_queue_release               = sfc_tx_queue_release,
1949         .flow_ctrl_get                  = sfc_flow_ctrl_get,
1950         .flow_ctrl_set                  = sfc_flow_ctrl_set,
1951         .mac_addr_set                   = sfc_mac_addr_set,
1952         .udp_tunnel_port_add            = sfc_dev_udp_tunnel_port_add,
1953         .udp_tunnel_port_del            = sfc_dev_udp_tunnel_port_del,
1954         .reta_update                    = sfc_dev_rss_reta_update,
1955         .reta_query                     = sfc_dev_rss_reta_query,
1956         .rss_hash_update                = sfc_dev_rss_hash_update,
1957         .rss_hash_conf_get              = sfc_dev_rss_hash_conf_get,
1958         .flow_ops_get                   = sfc_dev_flow_ops_get,
1959         .set_mc_addr_list               = sfc_set_mc_addr_list,
1960         .rxq_info_get                   = sfc_rx_queue_info_get,
1961         .txq_info_get                   = sfc_tx_queue_info_get,
1962         .fw_version_get                 = sfc_fw_version_get,
1963         .xstats_get_by_id               = sfc_xstats_get_by_id,
1964         .xstats_get_names_by_id         = sfc_xstats_get_names_by_id,
1965         .pool_ops_supported             = sfc_pool_ops_supported,
1966 };
1967
1968 struct sfc_ethdev_init_data {
1969         uint16_t                nb_representors;
1970 };
1971
1972 /**
1973  * Duplicate a string in potentially shared memory required for
1974  * multi-process support.
1975  *
1976  * strdup() allocates from process-local heap/memory.
1977  */
1978 static char *
1979 sfc_strdup(const char *str)
1980 {
1981         size_t size;
1982         char *copy;
1983
1984         if (str == NULL)
1985                 return NULL;
1986
1987         size = strlen(str) + 1;
1988         copy = rte_malloc(__func__, size, 0);
1989         if (copy != NULL)
1990                 rte_memcpy(copy, str, size);
1991
1992         return copy;
1993 }
1994
1995 static int
1996 sfc_eth_dev_set_ops(struct rte_eth_dev *dev)
1997 {
1998         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1999         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2000         const struct sfc_dp_rx *dp_rx;
2001         const struct sfc_dp_tx *dp_tx;
2002         const efx_nic_cfg_t *encp;
2003         unsigned int avail_caps = 0;
2004         const char *rx_name = NULL;
2005         const char *tx_name = NULL;
2006         int rc;
2007
2008         switch (sa->family) {
2009         case EFX_FAMILY_HUNTINGTON:
2010         case EFX_FAMILY_MEDFORD:
2011         case EFX_FAMILY_MEDFORD2:
2012                 avail_caps |= SFC_DP_HW_FW_CAP_EF10;
2013                 avail_caps |= SFC_DP_HW_FW_CAP_RX_EFX;
2014                 avail_caps |= SFC_DP_HW_FW_CAP_TX_EFX;
2015                 break;
2016         case EFX_FAMILY_RIVERHEAD:
2017                 avail_caps |= SFC_DP_HW_FW_CAP_EF100;
2018                 break;
2019         default:
2020                 break;
2021         }
2022
2023         encp = efx_nic_cfg_get(sa->nic);
2024         if (encp->enc_rx_es_super_buffer_supported)
2025                 avail_caps |= SFC_DP_HW_FW_CAP_RX_ES_SUPER_BUFFER;
2026
2027         rc = sfc_kvargs_process(sa, SFC_KVARG_RX_DATAPATH,
2028                                 sfc_kvarg_string_handler, &rx_name);
2029         if (rc != 0)
2030                 goto fail_kvarg_rx_datapath;
2031
2032         if (rx_name != NULL) {
2033                 dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, rx_name);
2034                 if (dp_rx == NULL) {
2035                         sfc_err(sa, "Rx datapath %s not found", rx_name);
2036                         rc = ENOENT;
2037                         goto fail_dp_rx;
2038                 }
2039                 if (!sfc_dp_match_hw_fw_caps(&dp_rx->dp, avail_caps)) {
2040                         sfc_err(sa,
2041                                 "Insufficient Hw/FW capabilities to use Rx datapath %s",
2042                                 rx_name);
2043                         rc = EINVAL;
2044                         goto fail_dp_rx_caps;
2045                 }
2046         } else {
2047                 dp_rx = sfc_dp_find_rx_by_caps(&sfc_dp_head, avail_caps);
2048                 if (dp_rx == NULL) {
2049                         sfc_err(sa, "Rx datapath by caps %#x not found",
2050                                 avail_caps);
2051                         rc = ENOENT;
2052                         goto fail_dp_rx;
2053                 }
2054         }
2055
2056         sas->dp_rx_name = sfc_strdup(dp_rx->dp.name);
2057         if (sas->dp_rx_name == NULL) {
2058                 rc = ENOMEM;
2059                 goto fail_dp_rx_name;
2060         }
2061
2062         sfc_notice(sa, "use %s Rx datapath", sas->dp_rx_name);
2063
2064         rc = sfc_kvargs_process(sa, SFC_KVARG_TX_DATAPATH,
2065                                 sfc_kvarg_string_handler, &tx_name);
2066         if (rc != 0)
2067                 goto fail_kvarg_tx_datapath;
2068
2069         if (tx_name != NULL) {
2070                 dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, tx_name);
2071                 if (dp_tx == NULL) {
2072                         sfc_err(sa, "Tx datapath %s not found", tx_name);
2073                         rc = ENOENT;
2074                         goto fail_dp_tx;
2075                 }
2076                 if (!sfc_dp_match_hw_fw_caps(&dp_tx->dp, avail_caps)) {
2077                         sfc_err(sa,
2078                                 "Insufficient Hw/FW capabilities to use Tx datapath %s",
2079                                 tx_name);
2080                         rc = EINVAL;
2081                         goto fail_dp_tx_caps;
2082                 }
2083         } else {
2084                 dp_tx = sfc_dp_find_tx_by_caps(&sfc_dp_head, avail_caps);
2085                 if (dp_tx == NULL) {
2086                         sfc_err(sa, "Tx datapath by caps %#x not found",
2087                                 avail_caps);
2088                         rc = ENOENT;
2089                         goto fail_dp_tx;
2090                 }
2091         }
2092
2093         sas->dp_tx_name = sfc_strdup(dp_tx->dp.name);
2094         if (sas->dp_tx_name == NULL) {
2095                 rc = ENOMEM;
2096                 goto fail_dp_tx_name;
2097         }
2098
2099         sfc_notice(sa, "use %s Tx datapath", sas->dp_tx_name);
2100
2101         sa->priv.dp_rx = dp_rx;
2102         sa->priv.dp_tx = dp_tx;
2103
2104         dev->rx_pkt_burst = dp_rx->pkt_burst;
2105         dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2106         dev->tx_pkt_burst = dp_tx->pkt_burst;
2107
2108         dev->rx_queue_count = sfc_rx_queue_count;
2109         dev->rx_descriptor_status = sfc_rx_descriptor_status;
2110         dev->tx_descriptor_status = sfc_tx_descriptor_status;
2111         dev->dev_ops = &sfc_eth_dev_ops;
2112
2113         return 0;
2114
2115 fail_dp_tx_name:
2116 fail_dp_tx_caps:
2117 fail_dp_tx:
2118 fail_kvarg_tx_datapath:
2119         rte_free(sas->dp_rx_name);
2120         sas->dp_rx_name = NULL;
2121
2122 fail_dp_rx_name:
2123 fail_dp_rx_caps:
2124 fail_dp_rx:
2125 fail_kvarg_rx_datapath:
2126         return rc;
2127 }
2128
2129 static void
2130 sfc_eth_dev_clear_ops(struct rte_eth_dev *dev)
2131 {
2132         struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2133         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2134
2135         dev->dev_ops = NULL;
2136         dev->tx_pkt_prepare = NULL;
2137         dev->rx_pkt_burst = NULL;
2138         dev->tx_pkt_burst = NULL;
2139
2140         rte_free(sas->dp_tx_name);
2141         sas->dp_tx_name = NULL;
2142         sa->priv.dp_tx = NULL;
2143
2144         rte_free(sas->dp_rx_name);
2145         sas->dp_rx_name = NULL;
2146         sa->priv.dp_rx = NULL;
2147 }
2148
2149 static const struct eth_dev_ops sfc_eth_dev_secondary_ops = {
2150         .dev_supported_ptypes_get       = sfc_dev_supported_ptypes_get,
2151         .reta_query                     = sfc_dev_rss_reta_query,
2152         .rss_hash_conf_get              = sfc_dev_rss_hash_conf_get,
2153         .rxq_info_get                   = sfc_rx_queue_info_get,
2154         .txq_info_get                   = sfc_tx_queue_info_get,
2155 };
2156
2157 static int
2158 sfc_eth_dev_secondary_init(struct rte_eth_dev *dev, uint32_t logtype_main)
2159 {
2160         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2161         struct sfc_adapter_priv *sap;
2162         const struct sfc_dp_rx *dp_rx;
2163         const struct sfc_dp_tx *dp_tx;
2164         int rc;
2165
2166         /*
2167          * Allocate process private data from heap, since it should not
2168          * be located in shared memory allocated using rte_malloc() API.
2169          */
2170         sap = calloc(1, sizeof(*sap));
2171         if (sap == NULL) {
2172                 rc = ENOMEM;
2173                 goto fail_alloc_priv;
2174         }
2175
2176         sap->logtype_main = logtype_main;
2177
2178         dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, sas->dp_rx_name);
2179         if (dp_rx == NULL) {
2180                 SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2181                         "cannot find %s Rx datapath", sas->dp_rx_name);
2182                 rc = ENOENT;
2183                 goto fail_dp_rx;
2184         }
2185         if (~dp_rx->features & SFC_DP_RX_FEAT_MULTI_PROCESS) {
2186                 SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2187                         "%s Rx datapath does not support multi-process",
2188                         sas->dp_rx_name);
2189                 rc = EINVAL;
2190                 goto fail_dp_rx_multi_process;
2191         }
2192
2193         dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, sas->dp_tx_name);
2194         if (dp_tx == NULL) {
2195                 SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2196                         "cannot find %s Tx datapath", sas->dp_tx_name);
2197                 rc = ENOENT;
2198                 goto fail_dp_tx;
2199         }
2200         if (~dp_tx->features & SFC_DP_TX_FEAT_MULTI_PROCESS) {
2201                 SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2202                         "%s Tx datapath does not support multi-process",
2203                         sas->dp_tx_name);
2204                 rc = EINVAL;
2205                 goto fail_dp_tx_multi_process;
2206         }
2207
2208         sap->dp_rx = dp_rx;
2209         sap->dp_tx = dp_tx;
2210
2211         dev->process_private = sap;
2212         dev->rx_pkt_burst = dp_rx->pkt_burst;
2213         dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2214         dev->tx_pkt_burst = dp_tx->pkt_burst;
2215         dev->rx_queue_count = sfc_rx_queue_count;
2216         dev->rx_descriptor_status = sfc_rx_descriptor_status;
2217         dev->tx_descriptor_status = sfc_tx_descriptor_status;
2218         dev->dev_ops = &sfc_eth_dev_secondary_ops;
2219
2220         return 0;
2221
2222 fail_dp_tx_multi_process:
2223 fail_dp_tx:
2224 fail_dp_rx_multi_process:
2225 fail_dp_rx:
2226         free(sap);
2227
2228 fail_alloc_priv:
2229         return rc;
2230 }
2231
2232 static void
2233 sfc_register_dp(void)
2234 {
2235         /* Register once */
2236         if (TAILQ_EMPTY(&sfc_dp_head)) {
2237                 /* Prefer EF10 datapath */
2238                 sfc_dp_register(&sfc_dp_head, &sfc_ef100_rx.dp);
2239                 sfc_dp_register(&sfc_dp_head, &sfc_ef10_essb_rx.dp);
2240                 sfc_dp_register(&sfc_dp_head, &sfc_ef10_rx.dp);
2241                 sfc_dp_register(&sfc_dp_head, &sfc_efx_rx.dp);
2242
2243                 sfc_dp_register(&sfc_dp_head, &sfc_ef100_tx.dp);
2244                 sfc_dp_register(&sfc_dp_head, &sfc_ef10_tx.dp);
2245                 sfc_dp_register(&sfc_dp_head, &sfc_efx_tx.dp);
2246                 sfc_dp_register(&sfc_dp_head, &sfc_ef10_simple_tx.dp);
2247         }
2248 }
2249
2250 static int
2251 sfc_parse_switch_mode(struct sfc_adapter *sa, bool has_representors)
2252 {
2253         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
2254         const char *switch_mode = NULL;
2255         int rc;
2256
2257         sfc_log_init(sa, "entry");
2258
2259         rc = sfc_kvargs_process(sa, SFC_KVARG_SWITCH_MODE,
2260                                 sfc_kvarg_string_handler, &switch_mode);
2261         if (rc != 0)
2262                 goto fail_kvargs;
2263
2264         if (switch_mode == NULL) {
2265                 sa->switchdev = encp->enc_mae_supported &&
2266                                 (!encp->enc_datapath_cap_evb ||
2267                                  has_representors);
2268         } else if (strcasecmp(switch_mode, SFC_KVARG_SWITCH_MODE_LEGACY) == 0) {
2269                 sa->switchdev = false;
2270         } else if (strcasecmp(switch_mode,
2271                               SFC_KVARG_SWITCH_MODE_SWITCHDEV) == 0) {
2272                 sa->switchdev = true;
2273         } else {
2274                 sfc_err(sa, "invalid switch mode device argument '%s'",
2275                         switch_mode);
2276                 rc = EINVAL;
2277                 goto fail_mode;
2278         }
2279
2280         sfc_log_init(sa, "done");
2281
2282         return 0;
2283
2284 fail_mode:
2285 fail_kvargs:
2286         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
2287
2288         return rc;
2289 }
2290
2291 static int
2292 sfc_eth_dev_init(struct rte_eth_dev *dev, void *init_params)
2293 {
2294         struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2295         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2296         struct sfc_ethdev_init_data *init_data = init_params;
2297         uint32_t logtype_main;
2298         struct sfc_adapter *sa;
2299         int rc;
2300         const efx_nic_cfg_t *encp;
2301         const struct rte_ether_addr *from;
2302         int ret;
2303
2304         if (sfc_efx_dev_class_get(pci_dev->device.devargs) !=
2305                         SFC_EFX_DEV_CLASS_NET) {
2306                 SFC_GENERIC_LOG(DEBUG,
2307                         "Incompatible device class: skip probing, should be probed by other sfc driver.");
2308                 return 1;
2309         }
2310
2311         rc = sfc_dp_mport_register();
2312         if (rc != 0)
2313                 return rc;
2314
2315         sfc_register_dp();
2316
2317         logtype_main = sfc_register_logtype(&pci_dev->addr,
2318                                             SFC_LOGTYPE_MAIN_STR,
2319                                             RTE_LOG_NOTICE);
2320
2321         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2322                 return -sfc_eth_dev_secondary_init(dev, logtype_main);
2323
2324         /* Required for logging */
2325         ret = snprintf(sas->log_prefix, sizeof(sas->log_prefix),
2326                         "PMD: sfc_efx " PCI_PRI_FMT " #%" PRIu16 ": ",
2327                         pci_dev->addr.domain, pci_dev->addr.bus,
2328                         pci_dev->addr.devid, pci_dev->addr.function,
2329                         dev->data->port_id);
2330         if (ret < 0 || ret >= (int)sizeof(sas->log_prefix)) {
2331                 SFC_GENERIC_LOG(ERR,
2332                         "reserved log prefix is too short for " PCI_PRI_FMT,
2333                         pci_dev->addr.domain, pci_dev->addr.bus,
2334                         pci_dev->addr.devid, pci_dev->addr.function);
2335                 return -EINVAL;
2336         }
2337         sas->pci_addr = pci_dev->addr;
2338         sas->port_id = dev->data->port_id;
2339
2340         /*
2341          * Allocate process private data from heap, since it should not
2342          * be located in shared memory allocated using rte_malloc() API.
2343          */
2344         sa = calloc(1, sizeof(*sa));
2345         if (sa == NULL) {
2346                 rc = ENOMEM;
2347                 goto fail_alloc_sa;
2348         }
2349
2350         dev->process_private = sa;
2351
2352         /* Required for logging */
2353         sa->priv.shared = sas;
2354         sa->priv.logtype_main = logtype_main;
2355
2356         sa->eth_dev = dev;
2357
2358         /* Copy PCI device info to the dev->data */
2359         rte_eth_copy_pci_info(dev, pci_dev);
2360         dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2361         dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
2362
2363         rc = sfc_kvargs_parse(sa);
2364         if (rc != 0)
2365                 goto fail_kvargs_parse;
2366
2367         sfc_log_init(sa, "entry");
2368
2369         dev->data->mac_addrs = rte_zmalloc("sfc", RTE_ETHER_ADDR_LEN, 0);
2370         if (dev->data->mac_addrs == NULL) {
2371                 rc = ENOMEM;
2372                 goto fail_mac_addrs;
2373         }
2374
2375         sfc_adapter_lock_init(sa);
2376         sfc_adapter_lock(sa);
2377
2378         sfc_log_init(sa, "probing");
2379         rc = sfc_probe(sa);
2380         if (rc != 0)
2381                 goto fail_probe;
2382
2383         /*
2384          * Selecting a default switch mode requires the NIC to be probed and
2385          * to have its capabilities filled in.
2386          */
2387         rc = sfc_parse_switch_mode(sa, init_data->nb_representors > 0);
2388         if (rc != 0)
2389                 goto fail_switch_mode;
2390
2391         sfc_log_init(sa, "set device ops");
2392         rc = sfc_eth_dev_set_ops(dev);
2393         if (rc != 0)
2394                 goto fail_set_ops;
2395
2396         sfc_log_init(sa, "attaching");
2397         rc = sfc_attach(sa);
2398         if (rc != 0)
2399                 goto fail_attach;
2400
2401         if (sa->switchdev && sa->mae.status != SFC_MAE_STATUS_SUPPORTED) {
2402                 sfc_err(sa,
2403                         "failed to enable switchdev mode without MAE support");
2404                 rc = ENOTSUP;
2405                 goto fail_switchdev_no_mae;
2406         }
2407
2408         encp = efx_nic_cfg_get(sa->nic);
2409
2410         /*
2411          * The arguments are really reverse order in comparison to
2412          * Linux kernel. Copy from NIC config to Ethernet device data.
2413          */
2414         from = (const struct rte_ether_addr *)(encp->enc_mac_addr);
2415         rte_ether_addr_copy(from, &dev->data->mac_addrs[0]);
2416
2417         sfc_adapter_unlock(sa);
2418
2419         sfc_log_init(sa, "done");
2420         return 0;
2421
2422 fail_switchdev_no_mae:
2423         sfc_detach(sa);
2424
2425 fail_attach:
2426         sfc_eth_dev_clear_ops(dev);
2427
2428 fail_set_ops:
2429 fail_switch_mode:
2430         sfc_unprobe(sa);
2431
2432 fail_probe:
2433         sfc_adapter_unlock(sa);
2434         sfc_adapter_lock_fini(sa);
2435         rte_free(dev->data->mac_addrs);
2436         dev->data->mac_addrs = NULL;
2437
2438 fail_mac_addrs:
2439         sfc_kvargs_cleanup(sa);
2440
2441 fail_kvargs_parse:
2442         sfc_log_init(sa, "failed %d", rc);
2443         dev->process_private = NULL;
2444         free(sa);
2445
2446 fail_alloc_sa:
2447         SFC_ASSERT(rc > 0);
2448         return -rc;
2449 }
2450
2451 static int
2452 sfc_eth_dev_uninit(struct rte_eth_dev *dev)
2453 {
2454         sfc_dev_close(dev);
2455
2456         return 0;
2457 }
2458
2459 static const struct rte_pci_id pci_id_sfc_efx_map[] = {
2460         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE) },
2461         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE_VF) },
2462         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT) },
2463         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT_VF) },
2464         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD) },
2465         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD_VF) },
2466         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2) },
2467         { RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2_VF) },
2468         { RTE_PCI_DEVICE(EFX_PCI_VENID_XILINX, EFX_PCI_DEVID_RIVERHEAD) },
2469         { .vendor_id = 0 /* sentinel */ }
2470 };
2471
2472 static int
2473 sfc_parse_rte_devargs(const char *args, struct rte_eth_devargs *devargs)
2474 {
2475         struct rte_eth_devargs eth_da = { .nb_representor_ports = 0 };
2476         int rc;
2477
2478         if (args != NULL) {
2479                 rc = rte_eth_devargs_parse(args, &eth_da);
2480                 if (rc != 0) {
2481                         SFC_GENERIC_LOG(ERR,
2482                                         "Failed to parse generic devargs '%s'",
2483                                         args);
2484                         return rc;
2485                 }
2486         }
2487
2488         *devargs = eth_da;
2489
2490         return 0;
2491 }
2492
2493 static int
2494 sfc_eth_dev_create(struct rte_pci_device *pci_dev,
2495                    struct sfc_ethdev_init_data *init_data,
2496                    struct rte_eth_dev **devp)
2497 {
2498         struct rte_eth_dev *dev;
2499         int rc;
2500
2501         rc = rte_eth_dev_create(&pci_dev->device, pci_dev->device.name,
2502                                 sizeof(struct sfc_adapter_shared),
2503                                 eth_dev_pci_specific_init, pci_dev,
2504                                 sfc_eth_dev_init, init_data);
2505         if (rc != 0) {
2506                 SFC_GENERIC_LOG(ERR, "Failed to create sfc ethdev '%s'",
2507                                 pci_dev->device.name);
2508                 return rc;
2509         }
2510
2511         dev = rte_eth_dev_allocated(pci_dev->device.name);
2512         if (dev == NULL) {
2513                 SFC_GENERIC_LOG(ERR, "Failed to find allocated sfc ethdev '%s'",
2514                                 pci_dev->device.name);
2515                 return -ENODEV;
2516         }
2517
2518         *devp = dev;
2519
2520         return 0;
2521 }
2522
2523 static int
2524 sfc_eth_dev_create_representors(struct rte_eth_dev *dev,
2525                                 const struct rte_eth_devargs *eth_da)
2526 {
2527         struct sfc_adapter *sa;
2528         unsigned int i;
2529         int rc;
2530
2531         if (eth_da->nb_representor_ports == 0)
2532                 return 0;
2533
2534         sa = sfc_adapter_by_eth_dev(dev);
2535
2536         if (!sa->switchdev) {
2537                 sfc_err(sa, "cannot create representors in non-switchdev mode");
2538                 return -EINVAL;
2539         }
2540
2541         if (!sfc_repr_available(sfc_sa2shared(sa))) {
2542                 sfc_err(sa, "cannot create representors: unsupported");
2543
2544                 return -ENOTSUP;
2545         }
2546
2547         for (i = 0; i < eth_da->nb_representor_ports; ++i) {
2548                 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
2549                 efx_mport_sel_t mport_sel;
2550
2551                 rc = efx_mae_mport_by_pcie_function(encp->enc_pf,
2552                                 eth_da->representor_ports[i], &mport_sel);
2553                 if (rc != 0) {
2554                         sfc_err(sa,
2555                                 "failed to get representor %u m-port: %s - ignore",
2556                                 eth_da->representor_ports[i],
2557                                 rte_strerror(-rc));
2558                         continue;
2559                 }
2560
2561                 rc = sfc_repr_create(dev, eth_da->representor_ports[i],
2562                                      sa->mae.switch_domain_id, &mport_sel);
2563                 if (rc != 0) {
2564                         sfc_err(sa, "cannot create representor %u: %s - ignore",
2565                                 eth_da->representor_ports[i],
2566                                 rte_strerror(-rc));
2567                 }
2568         }
2569
2570         return 0;
2571 }
2572
2573 static int sfc_eth_dev_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2574         struct rte_pci_device *pci_dev)
2575 {
2576         struct sfc_ethdev_init_data init_data;
2577         struct rte_eth_devargs eth_da;
2578         struct rte_eth_dev *dev;
2579         int rc;
2580
2581         if (pci_dev->device.devargs != NULL) {
2582                 rc = sfc_parse_rte_devargs(pci_dev->device.devargs->args,
2583                                            &eth_da);
2584                 if (rc != 0)
2585                         return rc;
2586         } else {
2587                 memset(&eth_da, 0, sizeof(eth_da));
2588         }
2589
2590         init_data.nb_representors = eth_da.nb_representor_ports;
2591
2592         if (eth_da.nb_representor_ports > 0 &&
2593             rte_eal_process_type() != RTE_PROC_PRIMARY) {
2594                 SFC_GENERIC_LOG(ERR,
2595                         "Create representors from secondary process not supported, dev '%s'",
2596                         pci_dev->device.name);
2597                 return -ENOTSUP;
2598         }
2599
2600         rc = sfc_eth_dev_create(pci_dev, &init_data, &dev);
2601         if (rc != 0)
2602                 return rc;
2603
2604         rc = sfc_eth_dev_create_representors(dev, &eth_da);
2605         if (rc != 0) {
2606                 (void)rte_eth_dev_destroy(dev, sfc_eth_dev_uninit);
2607                 return rc;
2608         }
2609
2610         return 0;
2611 }
2612
2613 static int sfc_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
2614 {
2615         return rte_eth_dev_pci_generic_remove(pci_dev, sfc_eth_dev_uninit);
2616 }
2617
2618 static struct rte_pci_driver sfc_efx_pmd = {
2619         .id_table = pci_id_sfc_efx_map,
2620         .drv_flags =
2621                 RTE_PCI_DRV_INTR_LSC |
2622                 RTE_PCI_DRV_NEED_MAPPING,
2623         .probe = sfc_eth_dev_pci_probe,
2624         .remove = sfc_eth_dev_pci_remove,
2625 };
2626
2627 RTE_PMD_REGISTER_PCI(net_sfc_efx, sfc_efx_pmd);
2628 RTE_PMD_REGISTER_PCI_TABLE(net_sfc_efx, pci_id_sfc_efx_map);
2629 RTE_PMD_REGISTER_KMOD_DEP(net_sfc_efx, "* igb_uio | uio_pci_generic | vfio-pci");
2630 RTE_PMD_REGISTER_PARAM_STRING(net_sfc_efx,
2631         SFC_KVARG_SWITCH_MODE "=" SFC_KVARG_VALUES_SWITCH_MODE " "
2632         SFC_KVARG_RX_DATAPATH "=" SFC_KVARG_VALUES_RX_DATAPATH " "
2633         SFC_KVARG_TX_DATAPATH "=" SFC_KVARG_VALUES_TX_DATAPATH " "
2634         SFC_KVARG_PERF_PROFILE "=" SFC_KVARG_VALUES_PERF_PROFILE " "
2635         SFC_KVARG_FW_VARIANT "=" SFC_KVARG_VALUES_FW_VARIANT " "
2636         SFC_KVARG_RXD_WAIT_TIMEOUT_NS "=<long> "
2637         SFC_KVARG_STATS_UPDATE_PERIOD_MS "=<long>");
2638
2639 RTE_INIT(sfc_driver_register_logtype)
2640 {
2641         int ret;
2642
2643         ret = rte_log_register_type_and_pick_level(SFC_LOGTYPE_PREFIX "driver",
2644                                                    RTE_LOG_NOTICE);
2645         sfc_logtype_driver = (ret < 0) ? RTE_LOGTYPE_PMD : ret;
2646 }