0727b66377525eb34fc3eec58ad24772d3b8ba36
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal_interrupts.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without 
8  *   modification, are permitted provided that the following conditions 
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright 
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright 
14  *       notice, this list of conditions and the following disclaimer in 
15  *       the documentation and/or other materials provided with the 
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its 
18  *       contributors may be used to endorse or promote products derived 
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  * 
33  */
34
35 #include <stdio.h>
36 #include <stdint.h>
37 #include <stdlib.h>
38 #include <pthread.h>
39 #include <sys/queue.h>
40 #include <malloc.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <errno.h>
45 #include <inttypes.h>
46 #include <sys/epoll.h>
47 #include <sys/signalfd.h>
48
49 #include <rte_common.h>
50 #include <rte_interrupts.h>
51 #include <rte_memory.h>
52 #include <rte_memzone.h>
53 #include <rte_launch.h>
54 #include <rte_tailq.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_debug.h>
62 #include <rte_log.h>
63 #include <rte_mempool.h>
64 #include <rte_pci.h>
65 #include <rte_malloc.h>
66 #include <rte_errno.h>
67 #include <rte_spinlock.h>
68
69 #include "eal_private.h"
70
71 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
72
73 /**
74  * union for pipe fds.
75  */
76 union intr_pipefds{
77         struct {
78                 int pipefd[2];
79         };
80         struct {
81                 int readfd;
82                 int writefd;
83         };
84 };
85
86 /**
87  * union buffer for reading on different devices
88  */
89 union rte_intr_read_buffer {
90         int uio_intr_count;              /* for uio device */
91         uint64_t timerfd_num;            /* for timerfd */
92         char charbuf[16];                /* for others */
93 };
94
95 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
96 TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
97
98 struct rte_intr_callback {
99         TAILQ_ENTRY(rte_intr_callback) next;
100         rte_intr_callback_fn cb_fn;  /**< callback address */
101         void *cb_arg;                /**< parameter for callback */
102 };
103
104 struct rte_intr_source {
105         TAILQ_ENTRY(rte_intr_source) next;
106         struct rte_intr_handle intr_handle; /**< interrupt handle */
107         struct rte_intr_cb_list callbacks;  /**< user callbacks */
108 };
109
110 /* global spinlock for interrupt data operation */
111 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
112
113 /* union buffer for pipe read/write */
114 static union intr_pipefds intr_pipe;
115
116 /* interrupt sources list */
117 static struct rte_intr_source_list intr_sources;
118
119 /* interrupt handling thread */
120 static pthread_t intr_thread;
121
122 int
123 rte_intr_callback_register(struct rte_intr_handle *intr_handle,
124                         rte_intr_callback_fn cb, void *cb_arg)
125 {
126         int ret = -1;
127         struct rte_intr_source *src;
128         int wake_thread = 0;
129
130         /* first do parameter checking */
131         if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
132                 RTE_LOG(ERR, EAL,
133                         "Registering with invalid input parameter\n");
134                 return -EINVAL;
135         }
136
137         /* allocate a new interrupt callback entity */
138         struct rte_intr_callback *callback =
139                 rte_zmalloc("interrupt callback list",
140                                 sizeof(*callback), 0);
141         if (callback == NULL) {
142                 RTE_LOG(ERR, EAL, "Can not allocate memory\n");
143                 return -ENOMEM;
144         }
145         callback->cb_fn = cb;
146         callback->cb_arg = cb_arg;
147
148         rte_spinlock_lock(&intr_lock);
149
150         /* check if there is at least one callback registered for the fd */
151         TAILQ_FOREACH(src, &intr_sources, next)
152         if (src->intr_handle.fd == intr_handle->fd) {
153                 if (src->callbacks.tqh_first == NULL)
154                         /* we had no interrupts for this */
155                         wake_thread = 1;
156
157                 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
158                 break;
159         }
160
161         /* No callback registered for this fd */
162         if (src == NULL){
163                 /* no existing callbacks for this - add new source */
164                 src = rte_zmalloc("interrupt source list", sizeof(*src), 0);
165                 if (src == NULL){
166                         RTE_LOG(ERR, EAL, "Can not allocate memory\n");
167                         ret = -ENOMEM;
168                         goto error;
169                 }
170                 src->intr_handle = *intr_handle;
171                 TAILQ_INIT(&src->callbacks);
172
173                 TAILQ_INSERT_TAIL(&intr_sources, src, next);
174                 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
175                 wake_thread = 1;
176         }
177
178         rte_spinlock_unlock(&intr_lock);
179         /**
180          * check if need to notify the pipe fd waited by epoll_wait to
181          * rebuild the wait list.
182          */
183         if (wake_thread)
184                 if (write(intr_pipe.writefd, "1", 1) < 0)
185                         return -EPIPE;
186
187         return 0;
188
189 error:
190         rte_spinlock_unlock(&intr_lock);
191
192         return ret;
193 }
194
195 int
196 rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
197                         rte_intr_callback_fn cb_fn, void *cb_arg)
198 {
199         int ret = -1;
200         struct rte_intr_source *src;
201         struct rte_intr_callback *cb;
202
203         /* do parameter checking first */
204         if (intr_handle == NULL || intr_handle->fd < 0) {
205                 RTE_LOG(ERR, EAL,
206                 "Unregistering with invalid input parameter\n");
207                 return -EINVAL;
208         }
209
210         rte_spinlock_lock(&intr_lock);
211
212         /* check if the insterrupt source for the fd is existent */
213         TAILQ_FOREACH(src, &intr_sources, next)
214                 if (src->intr_handle.fd == intr_handle->fd)
215                         break;
216
217         /* No interrupt source registered for the fd */
218         if (src == NULL) {
219                 ret = -ENOENT;
220                 goto error;
221         }
222
223         ret = 0;
224         TAILQ_FOREACH(cb, &src->callbacks, next) {
225                 if (cb->cb_fn != cb_fn)
226                         continue;
227                 if (cb_arg == (void *)-1 || cb->cb_arg == cb_arg) {
228                         TAILQ_REMOVE(&src->callbacks, cb, next);
229                         rte_free(cb);
230                         ret ++;
231                 }
232
233                 if (src->callbacks.tqh_first == NULL) {
234                         TAILQ_REMOVE(&intr_sources, src, next);
235                         rte_free(src);
236                 }
237         }
238
239         /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
240         if (write(intr_pipe.writefd, "1", 1) < 0) {
241                 ret = -EPIPE;
242                 goto error;
243         }
244
245         rte_spinlock_unlock(&intr_lock);
246
247         return ret;
248
249 error:
250         rte_spinlock_unlock(&intr_lock);
251
252         return ret;
253 }
254
255 int
256 rte_intr_enable(struct rte_intr_handle *intr_handle)
257 {
258         const int value = 1;
259
260         if (!intr_handle || intr_handle->fd < 0)
261                 return -1;
262
263         switch (intr_handle->type){
264         /* write to the uio fd to enable the interrupt */
265         case RTE_INTR_HANDLE_UIO:
266                 if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
267                         RTE_LOG(ERR, EAL,
268                                 "Error enabling interrupts for fd %d\n",
269                                                         intr_handle->fd);
270                         return -1;
271                 }
272                 break;
273         /* not used at this moment */
274         case RTE_INTR_HANDLE_ALARM:
275                 return -1;
276         /* unkown handle type */
277         default:
278                 RTE_LOG(ERR, EAL,
279                         "Unknown handle type of fd %d\n",
280                                         intr_handle->fd);
281                 return -1;
282         }
283
284         return 0;
285 }
286
287 int
288 rte_intr_disable(struct rte_intr_handle *intr_handle)
289 {
290         const int value = 0;
291
292         if (!intr_handle || intr_handle->fd < 0)
293                 return -1;
294
295         switch (intr_handle->type){
296         /* write to the uio fd to disable the interrupt */
297         case RTE_INTR_HANDLE_UIO:
298                 if (write(intr_handle->fd, &value, sizeof(value)) < 0){
299                         RTE_LOG(ERR, EAL,
300                                 "Error enabling interrupts for fd %d\n",
301                                                         intr_handle->fd);
302                         return -1;
303                 }
304                 break;
305         /* not used at this moment */
306         case RTE_INTR_HANDLE_ALARM:
307                 return -1;
308         /* unkown handle type */
309         default:
310                 RTE_LOG(ERR, EAL,
311                         "Unknown handle type of fd %d\n",
312                                         intr_handle->fd);
313                 return -1;
314         }
315
316         return 0;
317 }
318
319 static int
320 eal_intr_process_interrupts(struct epoll_event *events, int nfds)
321 {
322         int n, i, active_cb, bytes_read;
323         struct rte_intr_source *src;
324         struct rte_intr_callback *cb;
325         union rte_intr_read_buffer buf;
326         struct rte_intr_callback active_cbs[32];
327
328         for (n = 0; n < nfds; n++) {
329                 /**
330                  * if the pipe fd is ready to read, return out to
331                  * rebuild the wait list.
332                  */
333                 if (events[n].data.fd == intr_pipe.readfd){
334                         int r = read(intr_pipe.readfd, buf.charbuf,
335                                         sizeof(buf.charbuf));
336                         RTE_SET_USED(r);
337                         return -1;
338                 }
339                 rte_spinlock_lock(&intr_lock);
340                 TAILQ_FOREACH(src, &intr_sources, next)
341                         if (src->intr_handle.fd ==
342                                         events[n].data.fd)
343                                 break;
344                 if (src == NULL){
345                         rte_spinlock_unlock(&intr_lock);
346                         continue;
347                 }
348
349                 /* for this source, make a copy of all the callbacks,
350                  * then unlock the lock, so the callbacks can
351                  * themselves manipulate the list for future
352                  * instances.
353                  */
354                 active_cb = 0;
355                 memset(active_cbs, 0, sizeof(active_cbs));
356                 TAILQ_FOREACH(cb, &src->callbacks, next)
357                         active_cbs[active_cb++] = *cb;
358                 rte_spinlock_unlock(&intr_lock);
359
360                 /* set the length to be read dor different handle type */
361                 switch (src->intr_handle.type) {
362                 case RTE_INTR_HANDLE_UIO:
363                         bytes_read = 4;
364                         break;
365                 case RTE_INTR_HANDLE_ALARM:
366                         bytes_read = sizeof(uint64_t);
367                         break;
368                 default:
369                         bytes_read = 1;
370                         break;
371                 }
372                 /**
373                  * read out to clear the ready-to-be-read flag
374                  * for epoll_wait.
375                  */
376                 bytes_read = read(events[n].data.fd, &buf, bytes_read);
377                 if (bytes_read < 0) {
378                         RTE_LOG(ERR, EAL, "Error reading from file descriptor"
379                                 " %d, error: %d\n", events[n].data.fd, errno);
380                         continue;
381                 }
382                 else if (bytes_read == 0) {
383                         RTE_LOG(ERR, EAL,
384                                 "Read nothing from file descriptor %d.\n",
385                                                         events[n].data.fd);
386                         continue;
387                 }
388                 /**
389                  * Finally, call all callbacks from the copy
390                  * we made earlier.
391                  */
392                 for (i = 0; i < active_cb; i++) {
393                         if (active_cbs[i].cb_fn == NULL)
394                                 continue;
395                         active_cbs[i].cb_fn(&src->intr_handle,
396                                         active_cbs[i].cb_arg);
397                 }
398         }
399
400         return 0;
401 }
402
403 /**
404  * It handles all the interrupts.
405  *
406  * @param pfd
407  *  epoll file descriptor.
408  * @param totalfds
409  *  The number of file descriptors added in epoll.
410  *
411  * @return
412  *  void
413  */
414 static void
415 eal_intr_handle_interrupts(int pfd, unsigned totalfds)
416 {
417         struct epoll_event events[totalfds];
418         int nfds = 0;
419
420         for(;;) {
421                 nfds = epoll_wait(pfd, events, totalfds,
422                         EAL_INTR_EPOLL_WAIT_FOREVER);
423                 /* epoll_wait fail */
424                 if (nfds < 0) {
425                         if (errno == EINTR)
426                                 continue;
427                         RTE_LOG(ERR, EAL,
428                                 "epoll_wait returns with fail\n");
429                         return;
430                 }
431                 /* epoll_wait timeout, will never happens here */
432                 else if (nfds == 0)
433                         continue;
434                 /* epoll_wait has at least one fd ready to read */
435                 if (eal_intr_process_interrupts(events, nfds) < 0)
436                         return;
437         }
438 }
439
440 /**
441  * It builds/rebuilds up the epoll file descriptor with all the
442  * file descriptors being waited on. Then handles the interrupts.
443  *
444  * @param arg
445  *  pointer. (unused)
446  *
447  * @return
448  *  never return;
449  */
450 static __attribute__((noreturn)) void *
451 eal_intr_thread_main(__rte_unused void *arg)
452 {
453         struct epoll_event ev;
454
455         /* host thread, never break out */
456         for (;;) {
457                 /* build up the epoll fd with all descriptors we are to
458                  * wait on then pass it to the handle_interrupts function
459                  */
460                 static struct epoll_event pipe_event = {
461                         .events = EPOLLIN | EPOLLPRI,
462                 };
463                 struct rte_intr_source *src;
464                 unsigned numfds = 0;
465
466                 /* create epoll fd */
467                 int pfd = epoll_create(1);
468                 if (pfd < 0)
469                         rte_panic("Cannot create epoll instance\n");
470
471                 pipe_event.data.fd = intr_pipe.readfd;
472                 /**
473                  * add pipe fd into wait list, this pipe is used to
474                  * rebuild the wait list.
475                  */
476                 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
477                                                 &pipe_event) < 0) {
478                         rte_panic("Error adding fd to %d epoll_ctl, %s\n",
479                                         intr_pipe.readfd, strerror(errno));
480                 }
481                 numfds++;
482
483                 rte_spinlock_lock(&intr_lock);
484
485                 TAILQ_FOREACH(src, &intr_sources, next) {
486                         if (src->callbacks.tqh_first == NULL)
487                                 continue; /* skip those with no callbacks */
488                         ev.events = EPOLLIN | EPOLLPRI;
489                         ev.data.fd = src->intr_handle.fd;
490
491                         /**
492                          * add all the uio device file descriptor
493                          * into wait list.
494                          */
495                         if (epoll_ctl(pfd, EPOLL_CTL_ADD,
496                                         src->intr_handle.fd, &ev) < 0){
497                                 rte_panic("Error adding fd %d epoll_ctl, %s\n",
498                                         src->intr_handle.fd, strerror(errno));
499                         }
500                         else
501                                 numfds++;
502                 }
503                 rte_spinlock_unlock(&intr_lock);
504                 /* serve the interrupt */
505                 eal_intr_handle_interrupts(pfd, numfds);
506
507                 /**
508                  * when we return, we need to rebuild the
509                  * list of fds to monitor.
510                  */
511                 close(pfd);
512         }
513 }
514
515 int
516 rte_eal_intr_init(void)
517 {
518         int ret = 0;
519
520         /* init the global interrupt source head */
521         TAILQ_INIT(&intr_sources);
522
523         /**
524          * create a pipe which will be waited by epoll and notified to
525          * rebuild the wait list of epoll.
526          */
527         if (pipe(intr_pipe.pipefd) < 0)
528                 return -1;
529
530         /* create the host thread to wait/handle the interrupt */
531         ret = pthread_create(&intr_thread, NULL,
532                         eal_intr_thread_main, NULL);
533         if (ret != 0)
534                 RTE_LOG(ERR, EAL,
535                         "Failed to create thread for interrupt handling\n");
536
537         return -ret;
538 }
539