3b905e18f57eca06a1f39f1fba239c79c59ef24e
[dpdk.git] / lib / eal / linux / eal_dev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <unistd.h>
7 #include <fcntl.h>
8 #include <signal.h>
9 #include <sys/socket.h>
10 #include <linux/netlink.h>
11
12 #include <rte_string_fns.h>
13 #include <rte_log.h>
14 #include <rte_compat.h>
15 #include <rte_dev.h>
16 #include <rte_malloc.h>
17 #include <rte_interrupts.h>
18 #include <rte_alarm.h>
19 #include <rte_bus.h>
20 #include <rte_eal.h>
21 #include <rte_spinlock.h>
22 #include <rte_errno.h>
23
24 #include "eal_private.h"
25
26 static struct rte_intr_handle intr_handle = {
27         .type = RTE_INTR_HANDLE_DEV_EVENT,
28         .fd = -1,
29 };
30 static rte_rwlock_t monitor_lock = RTE_RWLOCK_INITIALIZER;
31 static uint32_t monitor_refcount;
32 static bool hotplug_handle;
33
34 #define EAL_UEV_MSG_LEN 4096
35 #define EAL_UEV_MSG_ELEM_LEN 128
36
37 /*
38  * spinlock for device hot-unplug failure handling. If it try to access bus or
39  * device, such as handle sigbus on bus or handle memory failure for device
40  * just need to use this lock. It could protect the bus and the device to avoid
41  * race condition.
42  */
43 static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
44
45 static struct sigaction sigbus_action_old;
46
47 static int sigbus_need_recover;
48
49 static void dev_uev_handler(__rte_unused void *param);
50
51 /* identify the system layer which reports this event. */
52 enum eal_dev_event_subsystem {
53         EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
54         EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
55         EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
56         EAL_DEV_EVENT_SUBSYSTEM_MAX
57 };
58
59 static void
60 sigbus_action_recover(void)
61 {
62         if (sigbus_need_recover) {
63                 sigaction(SIGBUS, &sigbus_action_old, NULL);
64                 sigbus_need_recover = 0;
65         }
66 }
67
68 static void sigbus_handler(int signum, siginfo_t *info,
69                                 void *ctx __rte_unused)
70 {
71         int ret;
72
73         RTE_LOG(DEBUG, EAL, "Thread catch SIGBUS, fault address:%p\n",
74                 info->si_addr);
75
76         rte_spinlock_lock(&failure_handle_lock);
77         ret = rte_bus_sigbus_handler(info->si_addr);
78         rte_spinlock_unlock(&failure_handle_lock);
79         if (ret == -1) {
80                 rte_exit(EXIT_FAILURE,
81                          "Failed to handle SIGBUS for hot-unplug, "
82                          "(rte_errno: %s)!", strerror(rte_errno));
83         } else if (ret == 1) {
84                 if (sigbus_action_old.sa_flags == SA_SIGINFO
85                     && sigbus_action_old.sa_sigaction) {
86                         (*(sigbus_action_old.sa_sigaction))(signum,
87                                                             info, ctx);
88                 } else if (sigbus_action_old.sa_flags != SA_SIGINFO
89                            && sigbus_action_old.sa_handler) {
90                         (*(sigbus_action_old.sa_handler))(signum);
91                 } else {
92                         rte_exit(EXIT_FAILURE,
93                                  "Failed to handle generic SIGBUS!");
94                 }
95         }
96
97         RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
98 }
99
100 static int cmp_dev_name(const struct rte_device *dev,
101         const void *_name)
102 {
103         const char *name = _name;
104
105         return strcmp(dev->name, name);
106 }
107
108 static int
109 dev_uev_socket_fd_create(void)
110 {
111         struct sockaddr_nl addr;
112         int ret;
113
114         intr_handle.fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
115                         SOCK_NONBLOCK,
116                         NETLINK_KOBJECT_UEVENT);
117         if (intr_handle.fd < 0) {
118                 RTE_LOG(ERR, EAL, "create uevent fd failed.\n");
119                 return -1;
120         }
121
122         memset(&addr, 0, sizeof(addr));
123         addr.nl_family = AF_NETLINK;
124         addr.nl_pid = 0;
125         addr.nl_groups = 0xffffffff;
126
127         ret = bind(intr_handle.fd, (struct sockaddr *) &addr, sizeof(addr));
128         if (ret < 0) {
129                 RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n");
130                 goto err;
131         }
132
133         return 0;
134 err:
135         close(intr_handle.fd);
136         intr_handle.fd = -1;
137         return ret;
138 }
139
140 struct rte_dev_event {
141         enum rte_dev_event_type type;   /**< device event type */
142         int subsystem;                  /**< subsystem id */
143         char *devname;                  /**< device name */
144 };
145
146 static int
147 dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
148 {
149         char action[EAL_UEV_MSG_ELEM_LEN];
150         char subsystem[EAL_UEV_MSG_ELEM_LEN];
151         char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
152         int i = 0;
153
154         memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
155         memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
156         memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
157
158         while (i < length) {
159                 for (; i < length; i++) {
160                         if (*buf)
161                                 break;
162                         buf++;
163                 }
164                 /**
165                  * check device uevent from kernel side, no need to check
166                  * uevent from udev.
167                  */
168                 if (!strncmp(buf, "libudev", 7)) {
169                         buf += 7;
170                         i += 7;
171                         return -1;
172                 }
173                 if (!strncmp(buf, "ACTION=", 7)) {
174                         buf += 7;
175                         i += 7;
176                         strlcpy(action, buf, sizeof(action));
177                 } else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
178                         buf += 10;
179                         i += 10;
180                         strlcpy(subsystem, buf, sizeof(subsystem));
181                 } else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
182                         buf += 14;
183                         i += 14;
184                         strlcpy(pci_slot_name, buf, sizeof(subsystem));
185                         event->devname = strdup(pci_slot_name);
186                 }
187                 for (; i < length; i++) {
188                         if (*buf == '\0')
189                                 break;
190                         buf++;
191                 }
192         }
193
194         /* parse the subsystem layer */
195         if (!strncmp(subsystem, "uio", 3))
196                 event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
197         else if (!strncmp(subsystem, "pci", 3))
198                 event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
199         else if (!strncmp(subsystem, "vfio", 4))
200                 event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
201         else
202                 goto err;
203
204         /* parse the action type */
205         if (!strncmp(action, "add", 3))
206                 event->type = RTE_DEV_EVENT_ADD;
207         else if (!strncmp(action, "remove", 6))
208                 event->type = RTE_DEV_EVENT_REMOVE;
209         else
210                 goto err;
211         return 0;
212 err:
213         free(event->devname);
214         return -1;
215 }
216
217 static void
218 dev_delayed_unregister(void *param)
219 {
220         rte_intr_callback_unregister(&intr_handle, dev_uev_handler, param);
221         close(intr_handle.fd);
222         intr_handle.fd = -1;
223 }
224
225 static void
226 dev_uev_handler(__rte_unused void *param)
227 {
228         struct rte_dev_event uevent;
229         int ret;
230         char buf[EAL_UEV_MSG_LEN];
231         struct rte_bus *bus;
232         struct rte_device *dev;
233         const char *busname = "";
234
235         memset(&uevent, 0, sizeof(struct rte_dev_event));
236         memset(buf, 0, EAL_UEV_MSG_LEN);
237
238         ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
239         if (ret < 0 && errno == EAGAIN)
240                 return;
241         else if (ret <= 0) {
242                 /* connection is closed or broken, can not up again. */
243                 RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n");
244                 rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
245                 return;
246         }
247
248         ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
249         if (ret < 0) {
250                 RTE_LOG(DEBUG, EAL, "Ignoring uevent '%s'\n", buf);
251                 return;
252         }
253
254         RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
255                 uevent.devname, uevent.type, uevent.subsystem);
256
257         switch (uevent.subsystem) {
258         case EAL_DEV_EVENT_SUBSYSTEM_PCI:
259         case EAL_DEV_EVENT_SUBSYSTEM_UIO:
260                 busname = "pci";
261                 break;
262         default:
263                 break;
264         }
265
266         if (uevent.devname) {
267                 if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
268                         rte_spinlock_lock(&failure_handle_lock);
269                         bus = rte_bus_find_by_name(busname);
270                         if (bus == NULL) {
271                                 RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
272                                         busname);
273                                 goto failure_handle_err;
274                         }
275
276                         dev = bus->find_device(NULL, cmp_dev_name,
277                                                uevent.devname);
278                         if (dev == NULL) {
279                                 RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
280                                         "bus (%s)\n", uevent.devname, busname);
281                                 goto failure_handle_err;
282                         }
283
284                         ret = bus->hot_unplug_handler(dev);
285                         if (ret) {
286                                 RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
287                                         "for device (%s)\n", dev->name);
288                         }
289                         rte_spinlock_unlock(&failure_handle_lock);
290                 }
291                 rte_dev_event_callback_process(uevent.devname, uevent.type);
292                 free(uevent.devname);
293         }
294
295         return;
296
297 failure_handle_err:
298         rte_spinlock_unlock(&failure_handle_lock);
299         free(uevent.devname);
300 }
301
302 int
303 rte_dev_event_monitor_start(void)
304 {
305         int ret = 0;
306
307         rte_rwlock_write_lock(&monitor_lock);
308
309         if (monitor_refcount) {
310                 monitor_refcount++;
311                 goto exit;
312         }
313
314         ret = dev_uev_socket_fd_create();
315         if (ret) {
316                 RTE_LOG(ERR, EAL, "error create device event fd.\n");
317                 goto exit;
318         }
319
320         ret = rte_intr_callback_register(&intr_handle, dev_uev_handler, NULL);
321
322         if (ret) {
323                 RTE_LOG(ERR, EAL, "fail to register uevent callback.\n");
324                 close(intr_handle.fd);
325                 intr_handle.fd = -1;
326                 goto exit;
327         }
328
329         monitor_refcount++;
330
331 exit:
332         rte_rwlock_write_unlock(&monitor_lock);
333         return ret;
334 }
335
336 int
337 rte_dev_event_monitor_stop(void)
338 {
339         int ret = 0;
340
341         rte_rwlock_write_lock(&monitor_lock);
342
343         if (!monitor_refcount) {
344                 RTE_LOG(ERR, EAL, "device event monitor already stopped\n");
345                 goto exit;
346         }
347
348         if (monitor_refcount > 1) {
349                 monitor_refcount--;
350                 goto exit;
351         }
352
353         ret = rte_intr_callback_unregister(&intr_handle, dev_uev_handler,
354                                            (void *)-1);
355         if (ret < 0) {
356                 RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n");
357                 goto exit;
358         }
359
360         close(intr_handle.fd);
361         intr_handle.fd = -1;
362
363         monitor_refcount--;
364
365 exit:
366         rte_rwlock_write_unlock(&monitor_lock);
367
368         return ret;
369 }
370
371 int
372 dev_sigbus_handler_register(void)
373 {
374         sigset_t mask;
375         struct sigaction action;
376
377         rte_errno = 0;
378
379         if (sigbus_need_recover)
380                 return 0;
381
382         sigemptyset(&mask);
383         sigaddset(&mask, SIGBUS);
384         action.sa_flags = SA_SIGINFO;
385         action.sa_mask = mask;
386         action.sa_sigaction = sigbus_handler;
387         sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
388
389         return rte_errno;
390 }
391
392 int
393 dev_sigbus_handler_unregister(void)
394 {
395         rte_errno = 0;
396
397         sigbus_action_recover();
398
399         return rte_errno;
400 }
401
402 int
403 rte_dev_hotplug_handle_enable(void)
404 {
405         int ret = 0;
406
407         ret = dev_sigbus_handler_register();
408         if (ret < 0)
409                 RTE_LOG(ERR, EAL,
410                         "fail to register sigbus handler for devices.\n");
411
412         hotplug_handle = true;
413
414         return ret;
415 }
416
417 int
418 rte_dev_hotplug_handle_disable(void)
419 {
420         int ret = 0;
421
422         ret = dev_sigbus_handler_unregister();
423         if (ret < 0)
424                 RTE_LOG(ERR, EAL,
425                         "fail to unregister sigbus handler for devices.\n");
426
427         hotplug_handle = false;
428
429         return ret;
430 }