net/mlx5: support shared Rx queue port data path
[dpdk.git] / drivers / net / failsafe / failsafe_ether.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_flow.h>
9 #include <rte_flow_driver.h>
10 #include <rte_cycles.h>
11
12 #include "failsafe_private.h"
13
14 /** Print a message out of a flow error. */
15 static int
16 fs_flow_complain(struct rte_flow_error *error)
17 {
18         static const char *const errstrlist[] = {
19                 [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
20                 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
21                 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
22                 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
23                 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
24                 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
25                 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
26                 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
27                 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
28                 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
29                 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
30                 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
31         };
32         const char *errstr;
33         char buf[32];
34         int err = rte_errno;
35
36         if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
37                         !errstrlist[error->type])
38                 errstr = "unknown type";
39         else
40                 errstr = errstrlist[error->type];
41         ERROR("Caught error type %d (%s): %s%s\n",
42                 error->type, errstr,
43                 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
44                                 error->cause), buf) : "",
45                 error->message ? error->message : "(no stated reason)");
46         return -err;
47 }
48
49 static int
50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
51                          struct sub_device *sdev)
52 {
53         struct rte_flow_error ferror;
54         int ret;
55
56         if (!PRIV(dev)->flow_isolated) {
57                 DEBUG("Flow isolation already disabled");
58         } else {
59                 DEBUG("Enabling flow isolation");
60                 ret = rte_flow_isolate(PORT_ID(sdev),
61                                        PRIV(dev)->flow_isolated,
62                                        &ferror);
63                 if (ret) {
64                         fs_flow_complain(&ferror);
65                         return ret;
66                 }
67         }
68         return 0;
69 }
70
71 static int
72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
73                 struct sub_device *sdev)
74 {
75         struct rte_eth_dev *edev;
76         struct rte_vlan_filter_conf *vfc1;
77         struct rte_vlan_filter_conf *vfc2;
78         struct rte_flow *flow;
79         struct rte_flow_error ferror;
80         uint32_t i;
81         int ret;
82
83         edev = ETH(sdev);
84         /* RX queue setup */
85         for (i = 0; i < dev->data->nb_rx_queues; i++) {
86                 struct rxq *rxq;
87
88                 rxq = dev->data->rx_queues[i];
89                 ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
90                                 rxq->info.nb_desc, rxq->socket_id,
91                                 &rxq->info.conf, rxq->info.mp);
92                 if (ret) {
93                         ERROR("rx_queue_setup failed");
94                         return ret;
95                 }
96         }
97         /* TX queue setup */
98         for (i = 0; i < dev->data->nb_tx_queues; i++) {
99                 struct txq *txq;
100
101                 txq = dev->data->tx_queues[i];
102                 ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
103                                 txq->info.nb_desc, txq->socket_id,
104                                 &txq->info.conf);
105                 if (ret) {
106                         ERROR("tx_queue_setup failed");
107                         return ret;
108                 }
109         }
110         /* dev_link.link_status */
111         if (dev->data->dev_link.link_status !=
112             edev->data->dev_link.link_status) {
113                 DEBUG("Configuring link_status");
114                 if (dev->data->dev_link.link_status)
115                         ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
116                 else
117                         ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
118                 if (ret) {
119                         ERROR("Failed to apply link_status");
120                         return ret;
121                 }
122         } else {
123                 DEBUG("link_status already set");
124         }
125         /* promiscuous */
126         if (dev->data->promiscuous != edev->data->promiscuous) {
127                 DEBUG("Configuring promiscuous");
128                 if (dev->data->promiscuous)
129                         ret = rte_eth_promiscuous_enable(PORT_ID(sdev));
130                 else
131                         ret = rte_eth_promiscuous_disable(PORT_ID(sdev));
132                 if (ret != 0) {
133                         ERROR("Failed to apply promiscuous mode");
134                         return ret;
135                 }
136         } else {
137                 DEBUG("promiscuous already set");
138         }
139         /* all_multicast */
140         if (dev->data->all_multicast != edev->data->all_multicast) {
141                 DEBUG("Configuring all_multicast");
142                 if (dev->data->all_multicast)
143                         ret = rte_eth_allmulticast_enable(PORT_ID(sdev));
144                 else
145                         ret = rte_eth_allmulticast_disable(PORT_ID(sdev));
146                 if (ret != 0) {
147                         ERROR("Failed to apply allmulticast mode");
148                         return ret;
149                 }
150         } else {
151                 DEBUG("all_multicast already set");
152         }
153         /* MTU */
154         if (dev->data->mtu != edev->data->mtu) {
155                 DEBUG("Configuring MTU");
156                 ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
157                 if (ret) {
158                         ERROR("Failed to apply MTU");
159                         return ret;
160                 }
161         } else {
162                 DEBUG("MTU already set");
163         }
164         /* default MAC */
165         DEBUG("Configuring default MAC address");
166         ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
167                         &dev->data->mac_addrs[0]);
168         if (ret) {
169                 ERROR("Setting default MAC address failed");
170                 return ret;
171         }
172         /* additional MAC */
173         if (PRIV(dev)->nb_mac_addr > 1)
174                 DEBUG("Configure additional MAC address%s",
175                         (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
176         for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
177                 struct rte_ether_addr *ea;
178
179                 ea = &dev->data->mac_addrs[i];
180                 ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
181                                 PRIV(dev)->mac_addr_pool[i]);
182                 if (ret) {
183                         char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE];
184
185                         rte_ether_format_addr(ea_fmt,
186                                         RTE_ETHER_ADDR_FMT_SIZE, ea);
187                         ERROR("Adding MAC address %s failed", ea_fmt);
188                         return ret;
189                 }
190         }
191         /*
192          * Propagate multicast MAC addresses to sub-devices,
193          * if non zero number of addresses is set.
194          * The condition is required to avoid breakage of failsafe
195          * for sub-devices which do not support the operation
196          * if the feature is really not used.
197          */
198         if (PRIV(dev)->nb_mcast_addr > 0) {
199                 DEBUG("Configuring multicast MAC addresses");
200                 ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
201                                                    PRIV(dev)->mcast_addrs,
202                                                    PRIV(dev)->nb_mcast_addr);
203                 if (ret) {
204                         ERROR("Failed to apply multicast MAC addresses");
205                         return ret;
206                 }
207         }
208         /* VLAN filter */
209         vfc1 = &dev->data->vlan_filter_conf;
210         vfc2 = &edev->data->vlan_filter_conf;
211         if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
212                 uint64_t vbit;
213                 uint64_t ids;
214                 size_t i;
215                 uint16_t vlan_id;
216
217                 DEBUG("Configuring VLAN filter");
218                 for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
219                         if (vfc1->ids[i] == 0)
220                                 continue;
221                         ids = vfc1->ids[i];
222                         while (ids) {
223                                 vlan_id = 64 * i;
224                                 /* count trailing zeroes */
225                                 vbit = ~ids & (ids - 1);
226                                 /* clear least significant bit set */
227                                 ids ^= (ids ^ (ids - 1)) ^ vbit;
228                                 for (; vbit; vlan_id++)
229                                         vbit >>= 1;
230                                 ret = rte_eth_dev_vlan_filter(
231                                         PORT_ID(sdev), vlan_id, 1);
232                                 if (ret) {
233                                         ERROR("Failed to apply VLAN filter %hu",
234                                                 vlan_id);
235                                         return ret;
236                                 }
237                         }
238                 }
239         } else {
240                 DEBUG("VLAN filter already set");
241         }
242         /* rte_flow */
243         if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
244                 DEBUG("rte_flow already set");
245         } else {
246                 DEBUG("Resetting rte_flow configuration");
247                 ret = rte_flow_flush(PORT_ID(sdev), &ferror);
248                 if (ret) {
249                         fs_flow_complain(&ferror);
250                         return ret;
251                 }
252                 i = 0;
253                 rte_errno = 0;
254                 DEBUG("Configuring rte_flow");
255                 TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
256                         DEBUG("Creating flow #%" PRIu32, i++);
257                         flow->flows[SUB_ID(sdev)] =
258                                 rte_flow_create(PORT_ID(sdev),
259                                                 flow->rule.attr,
260                                                 flow->rule.pattern,
261                                                 flow->rule.actions,
262                                                 &ferror);
263                         ret = rte_errno;
264                         if (ret)
265                                 break;
266                 }
267                 if (ret) {
268                         fs_flow_complain(&ferror);
269                         return ret;
270                 }
271         }
272         return 0;
273 }
274
275 static void
276 fs_dev_remove(struct sub_device *sdev)
277 {
278         int ret;
279
280         if (sdev == NULL)
281                 return;
282         switch (sdev->state) {
283         case DEV_STARTED:
284                 failsafe_rx_intr_uninstall_subdevice(sdev);
285                 ret = rte_eth_dev_stop(PORT_ID(sdev));
286                 if (ret < 0)
287                         ERROR("Failed to stop sub-device %u", SUB_ID(sdev));
288                 sdev->state = DEV_ACTIVE;
289                 /* fallthrough */
290         case DEV_ACTIVE:
291                 failsafe_eth_dev_unregister_callbacks(sdev);
292                 ret = rte_eth_dev_close(PORT_ID(sdev));
293                 if (ret < 0) {
294                         ERROR("Port close failed for sub-device %u",
295                               PORT_ID(sdev));
296                 }
297                 sdev->state = DEV_PROBED;
298                 /* fallthrough */
299         case DEV_PROBED:
300                 ret = rte_dev_remove(sdev->dev);
301                 if (ret < 0) {
302                         ERROR("Bus detach failed for sub_device %u",
303                               SUB_ID(sdev));
304                 } else {
305                         rte_eth_dev_release_port(ETH(sdev));
306                 }
307                 sdev->state = DEV_PARSED;
308                 /* fallthrough */
309         case DEV_PARSED:
310         case DEV_UNDEFINED:
311                 sdev->state = DEV_UNDEFINED;
312                 sdev->sdev_port_id = RTE_MAX_ETHPORTS;
313                 /* the end */
314                 break;
315         }
316         sdev->remove = 0;
317         failsafe_hotplug_alarm_install(fs_dev(sdev));
318 }
319
320 static void
321 fs_dev_stats_save(struct sub_device *sdev)
322 {
323         struct rte_eth_stats stats;
324         int err;
325
326         /* Attempt to read current stats. */
327         err = rte_eth_stats_get(PORT_ID(sdev), &stats);
328         if (err) {
329                 uint64_t timestamp = sdev->stats_snapshot.timestamp;
330
331                 WARN("Could not access latest statistics from sub-device %d.",
332                          SUB_ID(sdev));
333                 if (timestamp != 0)
334                         WARN("Using latest snapshot taken before %"PRIu64" seconds.",
335                                  (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
336         }
337         failsafe_stats_increment
338                 (&PRIV(fs_dev(sdev))->stats_accumulator,
339                 err ? &sdev->stats_snapshot.stats : &stats);
340         memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
341 }
342
343 static inline int
344 fs_rxtx_clean(struct sub_device *sdev)
345 {
346         uint16_t i;
347
348         for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
349                 if (FS_ATOMIC_RX(sdev, i))
350                         return 0;
351         for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
352                 if (FS_ATOMIC_TX(sdev, i))
353                         return 0;
354         return 1;
355 }
356
357 void
358 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
359 {
360         int ret;
361
362         if (sdev == NULL)
363                 return;
364         if (sdev->rmv_callback) {
365                 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
366                                                 RTE_ETH_EVENT_INTR_RMV,
367                                                 failsafe_eth_rmv_event_callback,
368                                                 sdev);
369                 if (ret)
370                         WARN("Failed to unregister RMV callback for sub_device"
371                              " %d", SUB_ID(sdev));
372                 sdev->rmv_callback = 0;
373         }
374         if (sdev->lsc_callback) {
375                 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
376                                                 RTE_ETH_EVENT_INTR_LSC,
377                                                 failsafe_eth_lsc_event_callback,
378                                                 sdev);
379                 if (ret)
380                         WARN("Failed to unregister LSC callback for sub_device"
381                              " %d", SUB_ID(sdev));
382                 sdev->lsc_callback = 0;
383         }
384 }
385
386 void
387 failsafe_dev_remove(struct rte_eth_dev *dev)
388 {
389         struct sub_device *sdev;
390         uint8_t i;
391
392         FOREACH_SUBDEV(sdev, i, dev) {
393                 if (!sdev->remove)
394                         continue;
395
396                 /* Active devices must have finished their burst and
397                  * their stats must be saved.
398                  */
399                 if (sdev->state >= DEV_ACTIVE &&
400                     fs_rxtx_clean(sdev) == 0)
401                         continue;
402                 if (fs_lock(dev, 1) != 0)
403                         return;
404                 if (sdev->state >= DEV_ACTIVE)
405                         fs_dev_stats_save(sdev);
406                 fs_dev_remove(sdev);
407                 fs_unlock(dev, 1);
408         }
409 }
410
411 static int
412 failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
413 {
414         struct rxq *rxq;
415         int ret;
416         uint16_t i;
417
418         for (i = 0; i < dev->data->nb_rx_queues; i++) {
419                 rxq = dev->data->rx_queues[i];
420
421                 if (rxq->info.conf.rx_deferred_start &&
422                     dev->data->rx_queue_state[i] ==
423                                                 RTE_ETH_QUEUE_STATE_STARTED) {
424                         /*
425                          * The subdevice Rx queue does not launch on device
426                          * start if deferred start flag is set. It needs to be
427                          * started manually in case an appropriate failsafe Rx
428                          * queue has been started earlier.
429                          */
430                         ret = dev->dev_ops->rx_queue_start(dev, i);
431                         if (ret) {
432                                 ERROR("Could not synchronize Rx queue %d", i);
433                                 return ret;
434                         }
435                 } else if (dev->data->rx_queue_state[i] ==
436                                                 RTE_ETH_QUEUE_STATE_STOPPED) {
437                         /*
438                          * The subdevice Rx queue needs to be stopped manually
439                          * in case an appropriate failsafe Rx queue has been
440                          * stopped earlier.
441                          */
442                         ret = dev->dev_ops->rx_queue_stop(dev, i);
443                         if (ret) {
444                                 ERROR("Could not synchronize Rx queue %d", i);
445                                 return ret;
446                         }
447                 }
448         }
449         return 0;
450 }
451
452 static int
453 failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
454 {
455         struct txq *txq;
456         int ret;
457         uint16_t i;
458
459         for (i = 0; i < dev->data->nb_tx_queues; i++) {
460                 txq = dev->data->tx_queues[i];
461
462                 if (txq->info.conf.tx_deferred_start &&
463                     dev->data->tx_queue_state[i] ==
464                                                 RTE_ETH_QUEUE_STATE_STARTED) {
465                         /*
466                          * The subdevice Tx queue does not launch on device
467                          * start if deferred start flag is set. It needs to be
468                          * started manually in case an appropriate failsafe Tx
469                          * queue has been started earlier.
470                          */
471                         ret = dev->dev_ops->tx_queue_start(dev, i);
472                         if (ret) {
473                                 ERROR("Could not synchronize Tx queue %d", i);
474                                 return ret;
475                         }
476                 } else if (dev->data->tx_queue_state[i] ==
477                                                 RTE_ETH_QUEUE_STATE_STOPPED) {
478                         /*
479                          * The subdevice Tx queue needs to be stopped manually
480                          * in case an appropriate failsafe Tx queue has been
481                          * stopped earlier.
482                          */
483                         ret = dev->dev_ops->tx_queue_stop(dev, i);
484                         if (ret) {
485                                 ERROR("Could not synchronize Tx queue %d", i);
486                                 return ret;
487                         }
488                 }
489         }
490         return 0;
491 }
492
493 int
494 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
495 {
496         struct sub_device *sdev;
497         uint32_t inactive;
498         int ret;
499         uint8_t i;
500
501         if (PRIV(dev)->state < DEV_PARSED)
502                 return 0;
503
504         ret = failsafe_args_parse_subs(dev);
505         if (ret)
506                 goto err_remove;
507
508         if (PRIV(dev)->state < DEV_PROBED)
509                 return 0;
510         ret = failsafe_eal_init(dev);
511         if (ret)
512                 goto err_remove;
513         if (PRIV(dev)->state < DEV_ACTIVE)
514                 return 0;
515         inactive = 0;
516         FOREACH_SUBDEV(sdev, i, dev) {
517                 if (sdev->state == DEV_PROBED) {
518                         inactive |= UINT32_C(1) << i;
519                         ret = eth_dev_flow_isolate_set(dev, sdev);
520                         if (ret) {
521                                 ERROR("Could not apply configuration to sub_device %d",
522                                       i);
523                                 goto err_remove;
524                         }
525                 }
526         }
527         ret = dev->dev_ops->dev_configure(dev);
528         if (ret)
529                 goto err_remove;
530         FOREACH_SUBDEV(sdev, i, dev) {
531                 if (inactive & (UINT32_C(1) << i)) {
532                         ret = fs_eth_dev_conf_apply(dev, sdev);
533                         if (ret) {
534                                 ERROR("Could not apply configuration to sub_device %d",
535                                       i);
536                                 goto err_remove;
537                         }
538                 }
539         }
540         /*
541          * If new devices have been configured, check if
542          * the link state has changed.
543          */
544         if (inactive)
545                 dev->dev_ops->link_update(dev, 1);
546         if (PRIV(dev)->state < DEV_STARTED)
547                 return 0;
548         ret = dev->dev_ops->dev_start(dev);
549         if (ret)
550                 goto err_remove;
551         ret = failsafe_eth_dev_rx_queues_sync(dev);
552         if (ret)
553                 goto err_remove;
554         ret = failsafe_eth_dev_tx_queues_sync(dev);
555         if (ret)
556                 goto err_remove;
557         return 0;
558 err_remove:
559         FOREACH_SUBDEV(sdev, i, dev)
560                 if (sdev->state != PRIV(dev)->state)
561                         sdev->remove = 1;
562         return ret;
563 }
564
565 void
566 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
567 {
568         uint32_t i;
569
570         RTE_ASSERT(to != NULL && from != NULL);
571         to->ipackets += from->ipackets;
572         to->opackets += from->opackets;
573         to->ibytes += from->ibytes;
574         to->obytes += from->obytes;
575         to->imissed += from->imissed;
576         to->ierrors += from->ierrors;
577         to->oerrors += from->oerrors;
578         to->rx_nombuf += from->rx_nombuf;
579         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
580                 to->q_ipackets[i] += from->q_ipackets[i];
581                 to->q_opackets[i] += from->q_opackets[i];
582                 to->q_ibytes[i] += from->q_ibytes[i];
583                 to->q_obytes[i] += from->q_obytes[i];
584                 to->q_errors[i] += from->q_errors[i];
585         }
586 }
587
588 int
589 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
590                                 enum rte_eth_event_type event __rte_unused,
591                                 void *cb_arg, void *out __rte_unused)
592 {
593         struct sub_device *sdev = cb_arg;
594
595         fs_lock(fs_dev(sdev), 0);
596         /* Switch as soon as possible tx_dev. */
597         fs_switch_dev(fs_dev(sdev), sdev);
598         /* Use safe bursts in any case. */
599         failsafe_set_burst_fn(fs_dev(sdev), 1);
600         /*
601          * Async removal, the sub-PMD will try to unregister
602          * the callback at the source of the current thread context.
603          */
604         sdev->remove = 1;
605         fs_unlock(fs_dev(sdev), 0);
606         return 0;
607 }
608
609 int
610 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
611                                 enum rte_eth_event_type event __rte_unused,
612                                 void *cb_arg, void *out __rte_unused)
613 {
614         struct rte_eth_dev *dev = cb_arg;
615         int ret;
616
617         ret = dev->dev_ops->link_update(dev, 0);
618         /* We must pass on the LSC event */
619         if (ret)
620                 return rte_eth_dev_callback_process(dev,
621                                                     RTE_ETH_EVENT_INTR_LSC,
622                                                     NULL);
623         else
624                 return 0;
625 }
626
627 /* Take sub-device ownership before it becomes exposed to the application. */
628 int
629 failsafe_eth_new_event_callback(uint16_t port_id,
630                                 enum rte_eth_event_type event __rte_unused,
631                                 void *cb_arg, void *out __rte_unused)
632 {
633         struct rte_eth_dev *fs_dev = cb_arg;
634         struct sub_device *sdev;
635         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
636         uint8_t i;
637
638         FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
639                 if (sdev->state >= DEV_PROBED)
640                         continue;
641                 if (dev->device == NULL) {
642                         WARN("Trying to probe malformed device %s.\n",
643                              sdev->devargs.name);
644                         continue;
645                 }
646                 if (strcmp(sdev->devargs.name, dev->device->name) != 0)
647                         continue;
648                 rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
649                 /* The actual owner will be checked after the port probing. */
650                 break;
651         }
652         return 0;
653 }