be0dd4490b65bf3545c40b90d4fa25f001fb74be
[dpdk.git] / lib / librte_eal / linux / eal / eal_interrupts.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <sys/queue.h>
10 #include <stdarg.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <errno.h>
14 #include <inttypes.h>
15 #include <sys/epoll.h>
16 #include <sys/signalfd.h>
17 #include <sys/ioctl.h>
18 #include <sys/eventfd.h>
19 #include <assert.h>
20 #include <stdbool.h>
21
22 #include <rte_common.h>
23 #include <rte_interrupts.h>
24 #include <rte_memory.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_debug.h>
32 #include <rte_log.h>
33 #include <rte_errno.h>
34 #include <rte_spinlock.h>
35 #include <rte_pause.h>
36 #include <rte_vfio.h>
37
38 #include "eal_private.h"
39 #include "eal_vfio.h"
40 #include "eal_thread.h"
41
42 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
43 #define NB_OTHER_INTR               1
44
45 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
46
47 /**
48  * union for pipe fds.
49  */
50 union intr_pipefds{
51         struct {
52                 int pipefd[2];
53         };
54         struct {
55                 int readfd;
56                 int writefd;
57         };
58 };
59
60 /**
61  * union buffer for reading on different devices
62  */
63 union rte_intr_read_buffer {
64         int uio_intr_count;              /* for uio device */
65 #ifdef VFIO_PRESENT
66         uint64_t vfio_intr_count;        /* for vfio device */
67 #endif
68         uint64_t timerfd_num;            /* for timerfd */
69         char charbuf[16];                /* for others */
70 };
71
72 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
73 TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
74
75 struct rte_intr_callback {
76         TAILQ_ENTRY(rte_intr_callback) next;
77         rte_intr_callback_fn cb_fn;  /**< callback address */
78         void *cb_arg;                /**< parameter for callback */
79         uint8_t pending_delete;      /**< delete after callback is called */
80         rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
81 };
82
83 struct rte_intr_source {
84         TAILQ_ENTRY(rte_intr_source) next;
85         struct rte_intr_handle intr_handle; /**< interrupt handle */
86         struct rte_intr_cb_list callbacks;  /**< user callbacks */
87         uint32_t active;
88 };
89
90 /* global spinlock for interrupt data operation */
91 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
92
93 /* union buffer for pipe read/write */
94 static union intr_pipefds intr_pipe;
95
96 /* interrupt sources list */
97 static struct rte_intr_source_list intr_sources;
98
99 /* interrupt handling thread */
100 static pthread_t intr_thread;
101
102 /* VFIO interrupts */
103 #ifdef VFIO_PRESENT
104
105 #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
106 /* irq set buffer length for queue interrupts and LSC interrupt */
107 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
108                               sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
109
110 /* enable legacy (INTx) interrupts */
111 static int
112 vfio_enable_intx(const struct rte_intr_handle *intr_handle) {
113         struct vfio_irq_set *irq_set;
114         char irq_set_buf[IRQ_SET_BUF_LEN];
115         int len, ret;
116         int *fd_ptr;
117
118         len = sizeof(irq_set_buf);
119
120         /* enable INTx */
121         irq_set = (struct vfio_irq_set *) irq_set_buf;
122         irq_set->argsz = len;
123         irq_set->count = 1;
124         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
125         irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
126         irq_set->start = 0;
127         fd_ptr = (int *) &irq_set->data;
128         *fd_ptr = intr_handle->fd;
129
130         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
131
132         if (ret) {
133                 RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n",
134                                                 intr_handle->fd);
135                 return -1;
136         }
137
138         /* unmask INTx after enabling */
139         memset(irq_set, 0, len);
140         len = sizeof(struct vfio_irq_set);
141         irq_set->argsz = len;
142         irq_set->count = 1;
143         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
144         irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
145         irq_set->start = 0;
146
147         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
148
149         if (ret) {
150                 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
151                                                 intr_handle->fd);
152                 return -1;
153         }
154         return 0;
155 }
156
157 /* disable legacy (INTx) interrupts */
158 static int
159 vfio_disable_intx(const struct rte_intr_handle *intr_handle) {
160         struct vfio_irq_set *irq_set;
161         char irq_set_buf[IRQ_SET_BUF_LEN];
162         int len, ret;
163
164         len = sizeof(struct vfio_irq_set);
165
166         /* mask interrupts before disabling */
167         irq_set = (struct vfio_irq_set *) irq_set_buf;
168         irq_set->argsz = len;
169         irq_set->count = 1;
170         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
171         irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
172         irq_set->start = 0;
173
174         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
175
176         if (ret) {
177                 RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n",
178                                                 intr_handle->fd);
179                 return -1;
180         }
181
182         /* disable INTx*/
183         memset(irq_set, 0, len);
184         irq_set->argsz = len;
185         irq_set->count = 0;
186         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
187         irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
188         irq_set->start = 0;
189
190         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
191
192         if (ret) {
193                 RTE_LOG(ERR, EAL,
194                         "Error disabling INTx interrupts for fd %d\n", intr_handle->fd);
195                 return -1;
196         }
197         return 0;
198 }
199
200 /* enable MSI interrupts */
201 static int
202 vfio_enable_msi(const struct rte_intr_handle *intr_handle) {
203         int len, ret;
204         char irq_set_buf[IRQ_SET_BUF_LEN];
205         struct vfio_irq_set *irq_set;
206         int *fd_ptr;
207
208         len = sizeof(irq_set_buf);
209
210         irq_set = (struct vfio_irq_set *) irq_set_buf;
211         irq_set->argsz = len;
212         irq_set->count = 1;
213         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
214         irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
215         irq_set->start = 0;
216         fd_ptr = (int *) &irq_set->data;
217         *fd_ptr = intr_handle->fd;
218
219         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
220
221         if (ret) {
222                 RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n",
223                                                 intr_handle->fd);
224                 return -1;
225         }
226         return 0;
227 }
228
229 /* disable MSI interrupts */
230 static int
231 vfio_disable_msi(const struct rte_intr_handle *intr_handle) {
232         struct vfio_irq_set *irq_set;
233         char irq_set_buf[IRQ_SET_BUF_LEN];
234         int len, ret;
235
236         len = sizeof(struct vfio_irq_set);
237
238         irq_set = (struct vfio_irq_set *) irq_set_buf;
239         irq_set->argsz = len;
240         irq_set->count = 0;
241         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
242         irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
243         irq_set->start = 0;
244
245         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
246
247         if (ret)
248                 RTE_LOG(ERR, EAL,
249                         "Error disabling MSI interrupts for fd %d\n", intr_handle->fd);
250
251         return ret;
252 }
253
254 /* enable MSI-X interrupts */
255 static int
256 vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
257         int len, ret;
258         char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
259         struct vfio_irq_set *irq_set;
260         int *fd_ptr;
261
262         len = sizeof(irq_set_buf);
263
264         irq_set = (struct vfio_irq_set *) irq_set_buf;
265         irq_set->argsz = len;
266         /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
267         irq_set->count = intr_handle->max_intr ?
268                 (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ?
269                 RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1;
270         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
271         irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
272         irq_set->start = 0;
273         fd_ptr = (int *) &irq_set->data;
274         /* INTR vector offset 0 reserve for non-efds mapping */
275         fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd;
276         memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds,
277                 sizeof(*intr_handle->efds) * intr_handle->nb_efd);
278
279         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
280
281         if (ret) {
282                 RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n",
283                                                 intr_handle->fd);
284                 return -1;
285         }
286
287         return 0;
288 }
289
290 /* disable MSI-X interrupts */
291 static int
292 vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
293         struct vfio_irq_set *irq_set;
294         char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
295         int len, ret;
296
297         len = sizeof(struct vfio_irq_set);
298
299         irq_set = (struct vfio_irq_set *) irq_set_buf;
300         irq_set->argsz = len;
301         irq_set->count = 0;
302         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
303         irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
304         irq_set->start = 0;
305
306         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
307
308         if (ret)
309                 RTE_LOG(ERR, EAL,
310                         "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd);
311
312         return ret;
313 }
314
315 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
316 /* enable req notifier */
317 static int
318 vfio_enable_req(const struct rte_intr_handle *intr_handle)
319 {
320         int len, ret;
321         char irq_set_buf[IRQ_SET_BUF_LEN];
322         struct vfio_irq_set *irq_set;
323         int *fd_ptr;
324
325         len = sizeof(irq_set_buf);
326
327         irq_set = (struct vfio_irq_set *) irq_set_buf;
328         irq_set->argsz = len;
329         irq_set->count = 1;
330         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
331                          VFIO_IRQ_SET_ACTION_TRIGGER;
332         irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
333         irq_set->start = 0;
334         fd_ptr = (int *) &irq_set->data;
335         *fd_ptr = intr_handle->fd;
336
337         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
338
339         if (ret) {
340                 RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n",
341                                                 intr_handle->fd);
342                 return -1;
343         }
344
345         return 0;
346 }
347
348 /* disable req notifier */
349 static int
350 vfio_disable_req(const struct rte_intr_handle *intr_handle)
351 {
352         struct vfio_irq_set *irq_set;
353         char irq_set_buf[IRQ_SET_BUF_LEN];
354         int len, ret;
355
356         len = sizeof(struct vfio_irq_set);
357
358         irq_set = (struct vfio_irq_set *) irq_set_buf;
359         irq_set->argsz = len;
360         irq_set->count = 0;
361         irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
362         irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
363         irq_set->start = 0;
364
365         ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
366
367         if (ret)
368                 RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n",
369                         intr_handle->fd);
370
371         return ret;
372 }
373 #endif
374 #endif
375
376 static int
377 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle)
378 {
379         unsigned char command_high;
380
381         /* use UIO config file descriptor for uio_pci_generic */
382         if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
383                 RTE_LOG(ERR, EAL,
384                         "Error reading interrupts status for fd %d\n",
385                         intr_handle->uio_cfg_fd);
386                 return -1;
387         }
388         /* disable interrupts */
389         command_high |= 0x4;
390         if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
391                 RTE_LOG(ERR, EAL,
392                         "Error disabling interrupts for fd %d\n",
393                         intr_handle->uio_cfg_fd);
394                 return -1;
395         }
396
397         return 0;
398 }
399
400 static int
401 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle)
402 {
403         unsigned char command_high;
404
405         /* use UIO config file descriptor for uio_pci_generic */
406         if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
407                 RTE_LOG(ERR, EAL,
408                         "Error reading interrupts status for fd %d\n",
409                         intr_handle->uio_cfg_fd);
410                 return -1;
411         }
412         /* enable interrupts */
413         command_high &= ~0x4;
414         if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
415                 RTE_LOG(ERR, EAL,
416                         "Error enabling interrupts for fd %d\n",
417                         intr_handle->uio_cfg_fd);
418                 return -1;
419         }
420
421         return 0;
422 }
423
424 static int
425 uio_intr_disable(const struct rte_intr_handle *intr_handle)
426 {
427         const int value = 0;
428
429         if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
430                 RTE_LOG(ERR, EAL,
431                         "Error disabling interrupts for fd %d (%s)\n",
432                         intr_handle->fd, strerror(errno));
433                 return -1;
434         }
435         return 0;
436 }
437
438 static int
439 uio_intr_enable(const struct rte_intr_handle *intr_handle)
440 {
441         const int value = 1;
442
443         if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
444                 RTE_LOG(ERR, EAL,
445                         "Error enabling interrupts for fd %d (%s)\n",
446                         intr_handle->fd, strerror(errno));
447                 return -1;
448         }
449         return 0;
450 }
451
452 int
453 rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
454                         rte_intr_callback_fn cb, void *cb_arg)
455 {
456         int ret, wake_thread;
457         struct rte_intr_source *src;
458         struct rte_intr_callback *callback;
459
460         wake_thread = 0;
461
462         /* first do parameter checking */
463         if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
464                 RTE_LOG(ERR, EAL,
465                         "Registering with invalid input parameter\n");
466                 return -EINVAL;
467         }
468
469         /* allocate a new interrupt callback entity */
470         callback = calloc(1, sizeof(*callback));
471         if (callback == NULL) {
472                 RTE_LOG(ERR, EAL, "Can not allocate memory\n");
473                 return -ENOMEM;
474         }
475         callback->cb_fn = cb;
476         callback->cb_arg = cb_arg;
477         callback->pending_delete = 0;
478         callback->ucb_fn = NULL;
479
480         rte_spinlock_lock(&intr_lock);
481
482         /* check if there is at least one callback registered for the fd */
483         TAILQ_FOREACH(src, &intr_sources, next) {
484                 if (src->intr_handle.fd == intr_handle->fd) {
485                         /* we had no interrupts for this */
486                         if (TAILQ_EMPTY(&src->callbacks))
487                                 wake_thread = 1;
488
489                         TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
490                         ret = 0;
491                         break;
492                 }
493         }
494
495         /* no existing callbacks for this - add new source */
496         if (src == NULL) {
497                 src = calloc(1, sizeof(*src));
498                 if (src == NULL) {
499                         RTE_LOG(ERR, EAL, "Can not allocate memory\n");
500                         free(callback);
501                         ret = -ENOMEM;
502                 } else {
503                         src->intr_handle = *intr_handle;
504                         TAILQ_INIT(&src->callbacks);
505                         TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
506                         TAILQ_INSERT_TAIL(&intr_sources, src, next);
507                         wake_thread = 1;
508                         ret = 0;
509                 }
510         }
511
512         rte_spinlock_unlock(&intr_lock);
513
514         /**
515          * check if need to notify the pipe fd waited by epoll_wait to
516          * rebuild the wait list.
517          */
518         if (wake_thread)
519                 if (write(intr_pipe.writefd, "1", 1) < 0)
520                         return -EPIPE;
521
522         return ret;
523 }
524
525 int __rte_experimental
526 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
527                                 rte_intr_callback_fn cb_fn, void *cb_arg,
528                                 rte_intr_unregister_callback_fn ucb_fn)
529 {
530         int ret;
531         struct rte_intr_source *src;
532         struct rte_intr_callback *cb, *next;
533
534         /* do parameter checking first */
535         if (intr_handle == NULL || intr_handle->fd < 0) {
536                 RTE_LOG(ERR, EAL,
537                 "Unregistering with invalid input parameter\n");
538                 return -EINVAL;
539         }
540
541         rte_spinlock_lock(&intr_lock);
542
543         /* check if the insterrupt source for the fd is existent */
544         TAILQ_FOREACH(src, &intr_sources, next)
545                 if (src->intr_handle.fd == intr_handle->fd)
546                         break;
547
548         /* No interrupt source registered for the fd */
549         if (src == NULL) {
550                 ret = -ENOENT;
551
552         /* only usable if the source is active */
553         } else if (src->active == 0) {
554                 ret = -EAGAIN;
555
556         } else {
557                 ret = 0;
558
559                 /* walk through the callbacks and mark all that match. */
560                 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
561                         next = TAILQ_NEXT(cb, next);
562                         if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
563                                         cb->cb_arg == cb_arg)) {
564                                 cb->pending_delete = 1;
565                                 cb->ucb_fn = ucb_fn;
566                                 ret++;
567                         }
568                 }
569         }
570
571         rte_spinlock_unlock(&intr_lock);
572
573         return ret;
574 }
575
576 int
577 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
578                         rte_intr_callback_fn cb_fn, void *cb_arg)
579 {
580         int ret;
581         struct rte_intr_source *src;
582         struct rte_intr_callback *cb, *next;
583
584         /* do parameter checking first */
585         if (intr_handle == NULL || intr_handle->fd < 0) {
586                 RTE_LOG(ERR, EAL,
587                 "Unregistering with invalid input parameter\n");
588                 return -EINVAL;
589         }
590
591         rte_spinlock_lock(&intr_lock);
592
593         /* check if the insterrupt source for the fd is existent */
594         TAILQ_FOREACH(src, &intr_sources, next)
595                 if (src->intr_handle.fd == intr_handle->fd)
596                         break;
597
598         /* No interrupt source registered for the fd */
599         if (src == NULL) {
600                 ret = -ENOENT;
601
602         /* interrupt source has some active callbacks right now. */
603         } else if (src->active != 0) {
604                 ret = -EAGAIN;
605
606         /* ok to remove. */
607         } else {
608                 ret = 0;
609
610                 /*walk through the callbacks and remove all that match. */
611                 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
612
613                         next = TAILQ_NEXT(cb, next);
614
615                         if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
616                                         cb->cb_arg == cb_arg)) {
617                                 TAILQ_REMOVE(&src->callbacks, cb, next);
618                                 free(cb);
619                                 ret++;
620                         }
621                 }
622
623                 /* all callbacks for that source are removed. */
624                 if (TAILQ_EMPTY(&src->callbacks)) {
625                         TAILQ_REMOVE(&intr_sources, src, next);
626                         free(src);
627                 }
628         }
629
630         rte_spinlock_unlock(&intr_lock);
631
632         /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
633         if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
634                 ret = -EPIPE;
635         }
636
637         return ret;
638 }
639
640 int
641 rte_intr_enable(const struct rte_intr_handle *intr_handle)
642 {
643         if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
644                 return 0;
645
646         if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
647                 return -1;
648
649         switch (intr_handle->type){
650         /* write to the uio fd to enable the interrupt */
651         case RTE_INTR_HANDLE_UIO:
652                 if (uio_intr_enable(intr_handle))
653                         return -1;
654                 break;
655         case RTE_INTR_HANDLE_UIO_INTX:
656                 if (uio_intx_intr_enable(intr_handle))
657                         return -1;
658                 break;
659         /* not used at this moment */
660         case RTE_INTR_HANDLE_ALARM:
661                 return -1;
662 #ifdef VFIO_PRESENT
663         case RTE_INTR_HANDLE_VFIO_MSIX:
664                 if (vfio_enable_msix(intr_handle))
665                         return -1;
666                 break;
667         case RTE_INTR_HANDLE_VFIO_MSI:
668                 if (vfio_enable_msi(intr_handle))
669                         return -1;
670                 break;
671         case RTE_INTR_HANDLE_VFIO_LEGACY:
672                 if (vfio_enable_intx(intr_handle))
673                         return -1;
674                 break;
675 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
676         case RTE_INTR_HANDLE_VFIO_REQ:
677                 if (vfio_enable_req(intr_handle))
678                         return -1;
679                 break;
680 #endif
681 #endif
682         /* not used at this moment */
683         case RTE_INTR_HANDLE_DEV_EVENT:
684                 return -1;
685         /* unknown handle type */
686         default:
687                 RTE_LOG(ERR, EAL,
688                         "Unknown handle type of fd %d\n",
689                                         intr_handle->fd);
690                 return -1;
691         }
692
693         return 0;
694 }
695
696 int
697 rte_intr_disable(const struct rte_intr_handle *intr_handle)
698 {
699         if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
700                 return 0;
701
702         if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
703                 return -1;
704
705         switch (intr_handle->type){
706         /* write to the uio fd to disable the interrupt */
707         case RTE_INTR_HANDLE_UIO:
708                 if (uio_intr_disable(intr_handle))
709                         return -1;
710                 break;
711         case RTE_INTR_HANDLE_UIO_INTX:
712                 if (uio_intx_intr_disable(intr_handle))
713                         return -1;
714                 break;
715         /* not used at this moment */
716         case RTE_INTR_HANDLE_ALARM:
717                 return -1;
718 #ifdef VFIO_PRESENT
719         case RTE_INTR_HANDLE_VFIO_MSIX:
720                 if (vfio_disable_msix(intr_handle))
721                         return -1;
722                 break;
723         case RTE_INTR_HANDLE_VFIO_MSI:
724                 if (vfio_disable_msi(intr_handle))
725                         return -1;
726                 break;
727         case RTE_INTR_HANDLE_VFIO_LEGACY:
728                 if (vfio_disable_intx(intr_handle))
729                         return -1;
730                 break;
731 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
732         case RTE_INTR_HANDLE_VFIO_REQ:
733                 if (vfio_disable_req(intr_handle))
734                         return -1;
735                 break;
736 #endif
737 #endif
738         /* not used at this moment */
739         case RTE_INTR_HANDLE_DEV_EVENT:
740                 return -1;
741         /* unknown handle type */
742         default:
743                 RTE_LOG(ERR, EAL,
744                         "Unknown handle type of fd %d\n",
745                                         intr_handle->fd);
746                 return -1;
747         }
748
749         return 0;
750 }
751
752 static int
753 eal_intr_process_interrupts(struct epoll_event *events, int nfds)
754 {
755         bool call = false;
756         int n, bytes_read, rv;
757         struct rte_intr_source *src;
758         struct rte_intr_callback *cb, *next;
759         union rte_intr_read_buffer buf;
760         struct rte_intr_callback active_cb;
761
762         for (n = 0; n < nfds; n++) {
763
764                 /**
765                  * if the pipe fd is ready to read, return out to
766                  * rebuild the wait list.
767                  */
768                 if (events[n].data.fd == intr_pipe.readfd){
769                         int r = read(intr_pipe.readfd, buf.charbuf,
770                                         sizeof(buf.charbuf));
771                         RTE_SET_USED(r);
772                         return -1;
773                 }
774                 rte_spinlock_lock(&intr_lock);
775                 TAILQ_FOREACH(src, &intr_sources, next)
776                         if (src->intr_handle.fd ==
777                                         events[n].data.fd)
778                                 break;
779                 if (src == NULL){
780                         rte_spinlock_unlock(&intr_lock);
781                         continue;
782                 }
783
784                 /* mark this interrupt source as active and release the lock. */
785                 src->active = 1;
786                 rte_spinlock_unlock(&intr_lock);
787
788                 /* set the length to be read dor different handle type */
789                 switch (src->intr_handle.type) {
790                 case RTE_INTR_HANDLE_UIO:
791                 case RTE_INTR_HANDLE_UIO_INTX:
792                         bytes_read = sizeof(buf.uio_intr_count);
793                         break;
794                 case RTE_INTR_HANDLE_ALARM:
795                         bytes_read = sizeof(buf.timerfd_num);
796                         break;
797 #ifdef VFIO_PRESENT
798                 case RTE_INTR_HANDLE_VFIO_MSIX:
799                 case RTE_INTR_HANDLE_VFIO_MSI:
800                 case RTE_INTR_HANDLE_VFIO_LEGACY:
801                         bytes_read = sizeof(buf.vfio_intr_count);
802                         break;
803 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
804                 case RTE_INTR_HANDLE_VFIO_REQ:
805                         bytes_read = 0;
806                         call = true;
807                         break;
808 #endif
809 #endif
810                 case RTE_INTR_HANDLE_VDEV:
811                 case RTE_INTR_HANDLE_EXT:
812                         bytes_read = 0;
813                         call = true;
814                         break;
815                 case RTE_INTR_HANDLE_DEV_EVENT:
816                         bytes_read = 0;
817                         call = true;
818                         break;
819                 default:
820                         bytes_read = 1;
821                         break;
822                 }
823
824                 if (bytes_read > 0) {
825                         /**
826                          * read out to clear the ready-to-be-read flag
827                          * for epoll_wait.
828                          */
829                         bytes_read = read(events[n].data.fd, &buf, bytes_read);
830                         if (bytes_read < 0) {
831                                 if (errno == EINTR || errno == EWOULDBLOCK)
832                                         continue;
833
834                                 RTE_LOG(ERR, EAL, "Error reading from file "
835                                         "descriptor %d: %s\n",
836                                         events[n].data.fd,
837                                         strerror(errno));
838                                 /*
839                                  * The device is unplugged or buggy, remove
840                                  * it as an interrupt source and return to
841                                  * force the wait list to be rebuilt.
842                                  */
843                                 rte_spinlock_lock(&intr_lock);
844                                 TAILQ_REMOVE(&intr_sources, src, next);
845                                 rte_spinlock_unlock(&intr_lock);
846
847                                 for (cb = TAILQ_FIRST(&src->callbacks); cb;
848                                                         cb = next) {
849                                         next = TAILQ_NEXT(cb, next);
850                                         TAILQ_REMOVE(&src->callbacks, cb, next);
851                                         free(cb);
852                                 }
853                                 free(src);
854                                 return -1;
855                         } else if (bytes_read == 0)
856                                 RTE_LOG(ERR, EAL, "Read nothing from file "
857                                         "descriptor %d\n", events[n].data.fd);
858                         else
859                                 call = true;
860                 }
861
862                 /* grab a lock, again to call callbacks and update status. */
863                 rte_spinlock_lock(&intr_lock);
864
865                 if (call) {
866
867                         /* Finally, call all callbacks. */
868                         TAILQ_FOREACH(cb, &src->callbacks, next) {
869
870                                 /* make a copy and unlock. */
871                                 active_cb = *cb;
872                                 rte_spinlock_unlock(&intr_lock);
873
874                                 /* call the actual callback */
875                                 active_cb.cb_fn(active_cb.cb_arg);
876
877                                 /*get the lock back. */
878                                 rte_spinlock_lock(&intr_lock);
879                         }
880                 }
881                 /* we done with that interrupt source, release it. */
882                 src->active = 0;
883
884                 rv = 0;
885
886                 /* check if any callback are supposed to be removed */
887                 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
888                         next = TAILQ_NEXT(cb, next);
889                         if (cb->pending_delete) {
890                                 TAILQ_REMOVE(&src->callbacks, cb, next);
891                                 if (cb->ucb_fn)
892                                         cb->ucb_fn(&src->intr_handle, cb->cb_arg);
893                                 free(cb);
894                                 rv++;
895                         }
896                 }
897
898                 /* all callbacks for that source are removed. */
899                 if (TAILQ_EMPTY(&src->callbacks)) {
900                         TAILQ_REMOVE(&intr_sources, src, next);
901                         free(src);
902                 }
903
904                 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
905                 if (rv >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
906                         rte_spinlock_unlock(&intr_lock);
907                         return -EPIPE;
908                 }
909
910                 rte_spinlock_unlock(&intr_lock);
911         }
912
913         return 0;
914 }
915
916 /**
917  * It handles all the interrupts.
918  *
919  * @param pfd
920  *  epoll file descriptor.
921  * @param totalfds
922  *  The number of file descriptors added in epoll.
923  *
924  * @return
925  *  void
926  */
927 static void
928 eal_intr_handle_interrupts(int pfd, unsigned totalfds)
929 {
930         struct epoll_event events[totalfds];
931         int nfds = 0;
932
933         for(;;) {
934                 nfds = epoll_wait(pfd, events, totalfds,
935                         EAL_INTR_EPOLL_WAIT_FOREVER);
936                 /* epoll_wait fail */
937                 if (nfds < 0) {
938                         if (errno == EINTR)
939                                 continue;
940                         RTE_LOG(ERR, EAL,
941                                 "epoll_wait returns with fail\n");
942                         return;
943                 }
944                 /* epoll_wait timeout, will never happens here */
945                 else if (nfds == 0)
946                         continue;
947                 /* epoll_wait has at least one fd ready to read */
948                 if (eal_intr_process_interrupts(events, nfds) < 0)
949                         return;
950         }
951 }
952
953 /**
954  * It builds/rebuilds up the epoll file descriptor with all the
955  * file descriptors being waited on. Then handles the interrupts.
956  *
957  * @param arg
958  *  pointer. (unused)
959  *
960  * @return
961  *  never return;
962  */
963 static __attribute__((noreturn)) void *
964 eal_intr_thread_main(__rte_unused void *arg)
965 {
966         struct epoll_event ev;
967
968         /* host thread, never break out */
969         for (;;) {
970                 /* build up the epoll fd with all descriptors we are to
971                  * wait on then pass it to the handle_interrupts function
972                  */
973                 static struct epoll_event pipe_event = {
974                         .events = EPOLLIN | EPOLLPRI,
975                 };
976                 struct rte_intr_source *src;
977                 unsigned numfds = 0;
978
979                 /* create epoll fd */
980                 int pfd = epoll_create(1);
981                 if (pfd < 0)
982                         rte_panic("Cannot create epoll instance\n");
983
984                 pipe_event.data.fd = intr_pipe.readfd;
985                 /**
986                  * add pipe fd into wait list, this pipe is used to
987                  * rebuild the wait list.
988                  */
989                 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
990                                                 &pipe_event) < 0) {
991                         rte_panic("Error adding fd to %d epoll_ctl, %s\n",
992                                         intr_pipe.readfd, strerror(errno));
993                 }
994                 numfds++;
995
996                 rte_spinlock_lock(&intr_lock);
997
998                 TAILQ_FOREACH(src, &intr_sources, next) {
999                         if (src->callbacks.tqh_first == NULL)
1000                                 continue; /* skip those with no callbacks */
1001                         ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
1002                         ev.data.fd = src->intr_handle.fd;
1003
1004                         /**
1005                          * add all the uio device file descriptor
1006                          * into wait list.
1007                          */
1008                         if (epoll_ctl(pfd, EPOLL_CTL_ADD,
1009                                         src->intr_handle.fd, &ev) < 0){
1010                                 rte_panic("Error adding fd %d epoll_ctl, %s\n",
1011                                         src->intr_handle.fd, strerror(errno));
1012                         }
1013                         else
1014                                 numfds++;
1015                 }
1016                 rte_spinlock_unlock(&intr_lock);
1017                 /* serve the interrupt */
1018                 eal_intr_handle_interrupts(pfd, numfds);
1019
1020                 /**
1021                  * when we return, we need to rebuild the
1022                  * list of fds to monitor.
1023                  */
1024                 close(pfd);
1025         }
1026 }
1027
1028 int
1029 rte_eal_intr_init(void)
1030 {
1031         int ret = 0;
1032
1033         /* init the global interrupt source head */
1034         TAILQ_INIT(&intr_sources);
1035
1036         /**
1037          * create a pipe which will be waited by epoll and notified to
1038          * rebuild the wait list of epoll.
1039          */
1040         if (pipe(intr_pipe.pipefd) < 0) {
1041                 rte_errno = errno;
1042                 return -1;
1043         }
1044
1045         /* create the host thread to wait/handle the interrupt */
1046         ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
1047                         eal_intr_thread_main, NULL);
1048         if (ret != 0) {
1049                 rte_errno = -ret;
1050                 RTE_LOG(ERR, EAL,
1051                         "Failed to create thread for interrupt handling\n");
1052         }
1053
1054         return ret;
1055 }
1056
1057 static void
1058 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
1059 {
1060         union rte_intr_read_buffer buf;
1061         int bytes_read = 0;
1062         int nbytes;
1063
1064         switch (intr_handle->type) {
1065         case RTE_INTR_HANDLE_UIO:
1066         case RTE_INTR_HANDLE_UIO_INTX:
1067                 bytes_read = sizeof(buf.uio_intr_count);
1068                 break;
1069 #ifdef VFIO_PRESENT
1070         case RTE_INTR_HANDLE_VFIO_MSIX:
1071         case RTE_INTR_HANDLE_VFIO_MSI:
1072         case RTE_INTR_HANDLE_VFIO_LEGACY:
1073                 bytes_read = sizeof(buf.vfio_intr_count);
1074                 break;
1075 #endif
1076         case RTE_INTR_HANDLE_VDEV:
1077                 bytes_read = intr_handle->efd_counter_size;
1078                 /* For vdev, number of bytes to read is set by driver */
1079                 break;
1080         case RTE_INTR_HANDLE_EXT:
1081                 return;
1082         default:
1083                 bytes_read = 1;
1084                 RTE_LOG(INFO, EAL, "unexpected intr type\n");
1085                 break;
1086         }
1087
1088         /**
1089          * read out to clear the ready-to-be-read flag
1090          * for epoll_wait.
1091          */
1092         if (bytes_read == 0)
1093                 return;
1094         do {
1095                 nbytes = read(fd, &buf, bytes_read);
1096                 if (nbytes < 0) {
1097                         if (errno == EINTR || errno == EWOULDBLOCK ||
1098                             errno == EAGAIN)
1099                                 continue;
1100                         RTE_LOG(ERR, EAL,
1101                                 "Error reading from fd %d: %s\n",
1102                                 fd, strerror(errno));
1103                 } else if (nbytes == 0)
1104                         RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd);
1105                 return;
1106         } while (1);
1107 }
1108
1109 static int
1110 eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
1111                         struct rte_epoll_event *events)
1112 {
1113         unsigned int i, count = 0;
1114         struct rte_epoll_event *rev;
1115
1116         for (i = 0; i < n; i++) {
1117                 rev = evs[i].data.ptr;
1118                 if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID,
1119                                                  RTE_EPOLL_EXEC))
1120                         continue;
1121
1122                 events[count].status        = RTE_EPOLL_VALID;
1123                 events[count].fd            = rev->fd;
1124                 events[count].epfd          = rev->epfd;
1125                 events[count].epdata.event  = rev->epdata.event;
1126                 events[count].epdata.data   = rev->epdata.data;
1127                 if (rev->epdata.cb_fun)
1128                         rev->epdata.cb_fun(rev->fd,
1129                                            rev->epdata.cb_arg);
1130
1131                 rte_compiler_barrier();
1132                 rev->status = RTE_EPOLL_VALID;
1133                 count++;
1134         }
1135         return count;
1136 }
1137
1138 static inline int
1139 eal_init_tls_epfd(void)
1140 {
1141         int pfd = epoll_create(255);
1142
1143         if (pfd < 0) {
1144                 RTE_LOG(ERR, EAL,
1145                         "Cannot create epoll instance\n");
1146                 return -1;
1147         }
1148         return pfd;
1149 }
1150
1151 int
1152 rte_intr_tls_epfd(void)
1153 {
1154         if (RTE_PER_LCORE(_epfd) == -1)
1155                 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
1156
1157         return RTE_PER_LCORE(_epfd);
1158 }
1159
1160 int
1161 rte_epoll_wait(int epfd, struct rte_epoll_event *events,
1162                int maxevents, int timeout)
1163 {
1164         struct epoll_event evs[maxevents];
1165         int rc;
1166
1167         if (!events) {
1168                 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
1169                 return -1;
1170         }
1171
1172         /* using per thread epoll fd */
1173         if (epfd == RTE_EPOLL_PER_THREAD)
1174                 epfd = rte_intr_tls_epfd();
1175
1176         while (1) {
1177                 rc = epoll_wait(epfd, evs, maxevents, timeout);
1178                 if (likely(rc > 0)) {
1179                         /* epoll_wait has at least one fd ready to read */
1180                         rc = eal_epoll_process_event(evs, rc, events);
1181                         break;
1182                 } else if (rc < 0) {
1183                         if (errno == EINTR)
1184                                 continue;
1185                         /* epoll_wait fail */
1186                         RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
1187                                 strerror(errno));
1188                         rc = -1;
1189                         break;
1190                 } else {
1191                         /* rc == 0, epoll_wait timed out */
1192                         break;
1193                 }
1194         }
1195
1196         return rc;
1197 }
1198
1199 static inline void
1200 eal_epoll_data_safe_free(struct rte_epoll_event *ev)
1201 {
1202         while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID,
1203                                     RTE_EPOLL_INVALID))
1204                 while (ev->status != RTE_EPOLL_VALID)
1205                         rte_pause();
1206         memset(&ev->epdata, 0, sizeof(ev->epdata));
1207         ev->fd = -1;
1208         ev->epfd = -1;
1209 }
1210
1211 int
1212 rte_epoll_ctl(int epfd, int op, int fd,
1213               struct rte_epoll_event *event)
1214 {
1215         struct epoll_event ev;
1216
1217         if (!event) {
1218                 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
1219                 return -1;
1220         }
1221
1222         /* using per thread epoll fd */
1223         if (epfd == RTE_EPOLL_PER_THREAD)
1224                 epfd = rte_intr_tls_epfd();
1225
1226         if (op == EPOLL_CTL_ADD) {
1227                 event->status = RTE_EPOLL_VALID;
1228                 event->fd = fd;  /* ignore fd in event */
1229                 event->epfd = epfd;
1230                 ev.data.ptr = (void *)event;
1231         }
1232
1233         ev.events = event->epdata.event;
1234         if (epoll_ctl(epfd, op, fd, &ev) < 0) {
1235                 RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
1236                         op, fd, strerror(errno));
1237                 if (op == EPOLL_CTL_ADD)
1238                         /* rollback status when CTL_ADD fail */
1239                         event->status = RTE_EPOLL_INVALID;
1240                 return -1;
1241         }
1242
1243         if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID)
1244                 eal_epoll_data_safe_free(event);
1245
1246         return 0;
1247 }
1248
1249 int
1250 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
1251                 int op, unsigned int vec, void *data)
1252 {
1253         struct rte_epoll_event *rev;
1254         struct rte_epoll_data *epdata;
1255         int epfd_op;
1256         unsigned int efd_idx;
1257         int rc = 0;
1258
1259         efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
1260                 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
1261
1262         if (!intr_handle || intr_handle->nb_efd == 0 ||
1263             efd_idx >= intr_handle->nb_efd) {
1264                 RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
1265                 return -EPERM;
1266         }
1267
1268         switch (op) {
1269         case RTE_INTR_EVENT_ADD:
1270                 epfd_op = EPOLL_CTL_ADD;
1271                 rev = &intr_handle->elist[efd_idx];
1272                 if (rev->status != RTE_EPOLL_INVALID) {
1273                         RTE_LOG(INFO, EAL, "Event already been added.\n");
1274                         return -EEXIST;
1275                 }
1276
1277                 /* attach to intr vector fd */
1278                 epdata = &rev->epdata;
1279                 epdata->event  = EPOLLIN | EPOLLPRI | EPOLLET;
1280                 epdata->data   = data;
1281                 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
1282                 epdata->cb_arg = (void *)intr_handle;
1283                 rc = rte_epoll_ctl(epfd, epfd_op,
1284                                    intr_handle->efds[efd_idx], rev);
1285                 if (!rc)
1286                         RTE_LOG(DEBUG, EAL,
1287                                 "efd %d associated with vec %d added on epfd %d"
1288                                 "\n", rev->fd, vec, epfd);
1289                 else
1290                         rc = -EPERM;
1291                 break;
1292         case RTE_INTR_EVENT_DEL:
1293                 epfd_op = EPOLL_CTL_DEL;
1294                 rev = &intr_handle->elist[efd_idx];
1295                 if (rev->status == RTE_EPOLL_INVALID) {
1296                         RTE_LOG(INFO, EAL, "Event does not exist.\n");
1297                         return -EPERM;
1298                 }
1299
1300                 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
1301                 if (rc)
1302                         rc = -EPERM;
1303                 break;
1304         default:
1305                 RTE_LOG(ERR, EAL, "event op type mismatch\n");
1306                 rc = -EPERM;
1307         }
1308
1309         return rc;
1310 }
1311
1312 void
1313 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
1314 {
1315         uint32_t i;
1316         struct rte_epoll_event *rev;
1317
1318         for (i = 0; i < intr_handle->nb_efd; i++) {
1319                 rev = &intr_handle->elist[i];
1320                 if (rev->status == RTE_EPOLL_INVALID)
1321                         continue;
1322                 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
1323                         /* force free if the entry valid */
1324                         eal_epoll_data_safe_free(rev);
1325                         rev->status = RTE_EPOLL_INVALID;
1326                 }
1327         }
1328 }
1329
1330 int
1331 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
1332 {
1333         uint32_t i;
1334         int fd;
1335         uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
1336
1337         assert(nb_efd != 0);
1338
1339         if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
1340                 for (i = 0; i < n; i++) {
1341                         fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1342                         if (fd < 0) {
1343                                 RTE_LOG(ERR, EAL,
1344                                         "can't setup eventfd, error %i (%s)\n",
1345                                         errno, strerror(errno));
1346                                 return -errno;
1347                         }
1348                         intr_handle->efds[i] = fd;
1349                 }
1350                 intr_handle->nb_efd   = n;
1351                 intr_handle->max_intr = NB_OTHER_INTR + n;
1352         } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
1353                 /* only check, initialization would be done in vdev driver.*/
1354                 if (intr_handle->efd_counter_size >
1355                     sizeof(union rte_intr_read_buffer)) {
1356                         RTE_LOG(ERR, EAL, "the efd_counter_size is oversized");
1357                         return -EINVAL;
1358                 }
1359         } else {
1360                 intr_handle->efds[0]  = intr_handle->fd;
1361                 intr_handle->nb_efd   = RTE_MIN(nb_efd, 1U);
1362                 intr_handle->max_intr = NB_OTHER_INTR;
1363         }
1364
1365         return 0;
1366 }
1367
1368 void
1369 rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
1370 {
1371         uint32_t i;
1372
1373         rte_intr_free_epoll_fd(intr_handle);
1374         if (intr_handle->max_intr > intr_handle->nb_efd) {
1375                 for (i = 0; i < intr_handle->nb_efd; i++)
1376                         close(intr_handle->efds[i]);
1377         }
1378         intr_handle->nb_efd = 0;
1379         intr_handle->max_intr = 0;
1380 }
1381
1382 int
1383 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
1384 {
1385         return !(!intr_handle->nb_efd);
1386 }
1387
1388 int
1389 rte_intr_allow_others(struct rte_intr_handle *intr_handle)
1390 {
1391         if (!rte_intr_dp_is_en(intr_handle))
1392                 return 1;
1393         else
1394                 return !!(intr_handle->max_intr - intr_handle->nb_efd);
1395 }
1396
1397 int
1398 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
1399 {
1400         if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
1401                 return 1;
1402
1403         if (intr_handle->type == RTE_INTR_HANDLE_VDEV)
1404                 return 1;
1405
1406         return 0;
1407 }