first public release
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal_interrupts.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without 
8  *   modification, are permitted provided that the following conditions 
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright 
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright 
14  *       notice, this list of conditions and the following disclaimer in 
15  *       the documentation and/or other materials provided with the 
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its 
18  *       contributors may be used to endorse or promote products derived 
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  * 
33  *  version: DPDK.L.1.2.3-3
34  */
35
36 #include <stdio.h>
37 #include <stdint.h>
38 #include <stdlib.h>
39 #include <pthread.h>
40 #include <sys/queue.h>
41 #include <malloc.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <sys/epoll.h>
48 #include <sys/signalfd.h>
49
50 #include <rte_common.h>
51 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_debug.h>
63 #include <rte_log.h>
64 #include <rte_mempool.h>
65 #include <rte_pci.h>
66 #include <rte_malloc.h>
67 #include <rte_errno.h>
68 #include <rte_spinlock.h>
69
70 #include "eal_private.h"
71
72 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
73
74 /**
75  * union for pipe fds.
76  */
77 union intr_pipefds{
78         struct {
79                 int pipefd[2];
80         };
81         struct {
82                 int readfd;
83                 int writefd;
84         };
85 };
86
87 /**
88  * union buffer for reading on different devices
89  */
90 union rte_intr_read_buffer {
91         int uio_intr_count;              /* for uio device */
92         uint64_t timerfd_num;            /* for timerfd */
93         char charbuf[16];                /* for others */
94 };
95
96 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
97 TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
98
99 struct rte_intr_callback {
100         TAILQ_ENTRY(rte_intr_callback) next;
101         rte_intr_callback_fn cb_fn;  /**< callback address */
102         void *cb_arg;                /**< parameter for callback */
103 };
104
105 struct rte_intr_source {
106         TAILQ_ENTRY(rte_intr_source) next;
107         struct rte_intr_handle intr_handle; /**< interrupt handle */
108         struct rte_intr_cb_list callbacks;  /**< user callbacks */
109 };
110
111 /* global spinlock for interrupt data operation */
112 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
113
114 /* union buffer for pipe read/write */
115 static union intr_pipefds intr_pipe;
116
117 /* interrupt sources list */
118 static struct rte_intr_source_list intr_sources;
119
120 /* interrupt handling thread */
121 static pthread_t intr_thread;
122
123 int
124 rte_intr_callback_register(struct rte_intr_handle *intr_handle,
125                         rte_intr_callback_fn cb, void *cb_arg)
126 {
127         int ret = -1;
128         struct rte_intr_source *src;
129         int wake_thread = 0;
130
131         /* first do parameter checking */
132         if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
133                 RTE_LOG(ERR, EAL,
134                         "Registering with invalid input parameter\n");
135                 return -EINVAL;
136         }
137
138         /* allocate a new interrupt callback entity */
139         struct rte_intr_callback *callback =
140                 rte_zmalloc("interrupt callback list",
141                                 sizeof(*callback), 0);
142         if (callback == NULL) {
143                 RTE_LOG(ERR, EAL, "Can not allocate memory\n");
144                 return -ENOMEM;
145         }
146         callback->cb_fn = cb;
147         callback->cb_arg = cb_arg;
148
149         rte_spinlock_lock(&intr_lock);
150
151         /* check if there is at least one callback registered for the fd */
152         TAILQ_FOREACH(src, &intr_sources, next)
153         if (src->intr_handle.fd == intr_handle->fd) {
154                 if (src->callbacks.tqh_first == NULL)
155                         /* we had no interrupts for this */
156                         wake_thread = 1;
157
158                 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
159                 break;
160         }
161
162         /* No callback registered for this fd */
163         if (src == NULL){
164                 /* no existing callbacks for this - add new source */
165                 src = rte_zmalloc("interrupt source list", sizeof(*src), 0);
166                 if (src == NULL){
167                         RTE_LOG(ERR, EAL, "Can not allocate memory\n");
168                         ret = -ENOMEM;
169                         goto error;
170                 }
171                 src->intr_handle = *intr_handle;
172                 TAILQ_INIT(&src->callbacks);
173
174                 TAILQ_INSERT_TAIL(&intr_sources, src, next);
175                 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
176                 wake_thread = 1;
177         }
178
179         rte_spinlock_unlock(&intr_lock);
180         /**
181          * check if need to notify the pipe fd waited by epoll_wait to
182          * rebuild the wait list.
183          */
184         if (wake_thread)
185                 if (write(intr_pipe.writefd, "1", 1) < 0)
186                         return -EPIPE;
187
188         return 0;
189
190 error:
191         rte_spinlock_unlock(&intr_lock);
192
193         return ret;
194 }
195
196 int
197 rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
198                         rte_intr_callback_fn cb_fn, void *cb_arg)
199 {
200         int ret = -1;
201         struct rte_intr_source *src;
202         struct rte_intr_callback *cb;
203
204         /* do parameter checking first */
205         if (intr_handle == NULL || intr_handle->fd < 0) {
206                 RTE_LOG(ERR, EAL,
207                 "Unregistering with invalid input parameter\n");
208                 return -EINVAL;
209         }
210
211         rte_spinlock_lock(&intr_lock);
212
213         /* check if the insterrupt source for the fd is existent */
214         TAILQ_FOREACH(src, &intr_sources, next)
215                 if (src->intr_handle.fd == intr_handle->fd)
216                         break;
217
218         /* No interrupt source registered for the fd */
219         if (src == NULL) {
220                 ret = -ENOENT;
221                 goto error;
222         }
223
224         ret = 0;
225         TAILQ_FOREACH(cb, &src->callbacks, next) {
226                 if (cb->cb_fn != cb_fn)
227                         continue;
228                 if (cb_arg == (void *)-1 || cb->cb_arg == cb_arg) {
229                         TAILQ_REMOVE(&src->callbacks, cb, next);
230                         rte_free(cb);
231                         ret ++;
232                 }
233
234                 if (src->callbacks.tqh_first == NULL) {
235                         TAILQ_REMOVE(&intr_sources, src, next);
236                         rte_free(src);
237                 }
238         }
239
240         /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
241         if (write(intr_pipe.writefd, "1", 1) < 0) {
242                 ret = -EPIPE;
243                 goto error;
244         }
245
246         rte_spinlock_unlock(&intr_lock);
247
248         return ret;
249
250 error:
251         rte_spinlock_unlock(&intr_lock);
252
253         return ret;
254 }
255
256 int
257 rte_intr_enable(struct rte_intr_handle *intr_handle)
258 {
259         const int value = 1;
260
261         if (!intr_handle || intr_handle->fd < 0)
262                 return -1;
263
264         switch (intr_handle->type){
265         /* write to the uio fd to enable the interrupt */
266         case RTE_INTR_HANDLE_UIO:
267                 if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
268                         RTE_LOG(ERR, EAL,
269                                 "Error enabling interrupts for fd %d\n",
270                                                         intr_handle->fd);
271                         return -1;
272                 }
273                 break;
274         /* not used at this moment */
275         case RTE_INTR_HANDLE_ALARM:
276                 return -1;
277         /* unkown handle type */
278         default:
279                 RTE_LOG(ERR, EAL,
280                         "Unknown handle type of fd %d\n",
281                                         intr_handle->fd);
282                 return -1;
283         }
284
285         return 0;
286 }
287
288 int
289 rte_intr_disable(struct rte_intr_handle *intr_handle)
290 {
291         const int value = 0;
292
293         if (!intr_handle || intr_handle->fd < 0)
294                 return -1;
295
296         switch (intr_handle->type){
297         /* write to the uio fd to disable the interrupt */
298         case RTE_INTR_HANDLE_UIO:
299                 if (write(intr_handle->fd, &value, sizeof(value)) < 0){
300                         RTE_LOG(ERR, EAL,
301                                 "Error enabling interrupts for fd %d\n",
302                                                         intr_handle->fd);
303                         return -1;
304                 }
305                 break;
306         /* not used at this moment */
307         case RTE_INTR_HANDLE_ALARM:
308                 return -1;
309         /* unkown handle type */
310         default:
311                 RTE_LOG(ERR, EAL,
312                         "Unknown handle type of fd %d\n",
313                                         intr_handle->fd);
314                 return -1;
315         }
316
317         return 0;
318 }
319
320 static int
321 eal_intr_process_interrupts(struct epoll_event *events, int nfds)
322 {
323         int n, i, active_cb, bytes_read;
324         struct rte_intr_source *src;
325         struct rte_intr_callback *cb;
326         union rte_intr_read_buffer buf;
327         struct rte_intr_callback active_cbs[32];
328
329         for (n = 0; n < nfds; n++) {
330                 /**
331                  * if the pipe fd is ready to read, return out to
332                  * rebuild the wait list.
333                  */
334                 if (events[n].data.fd == intr_pipe.readfd){
335                         int r = read(intr_pipe.readfd, buf.charbuf,
336                                         sizeof(buf.charbuf));
337                         RTE_SET_USED(r);
338                         return -1;
339                 }
340                 rte_spinlock_lock(&intr_lock);
341                 TAILQ_FOREACH(src, &intr_sources, next)
342                         if (src->intr_handle.fd ==
343                                         events[n].data.fd)
344                                 break;
345                 if (src == NULL){
346                         rte_spinlock_unlock(&intr_lock);
347                         continue;
348                 }
349
350                 /* for this source, make a copy of all the callbacks,
351                  * then unlock the lock, so the callbacks can
352                  * themselves manipulate the list for future
353                  * instances.
354                  */
355                 active_cb = 0;
356                 memset(active_cbs, 0, sizeof(active_cbs));
357                 TAILQ_FOREACH(cb, &src->callbacks, next)
358                         active_cbs[active_cb++] = *cb;
359                 rte_spinlock_unlock(&intr_lock);
360
361                 /* set the length to be read dor different handle type */
362                 switch (src->intr_handle.type) {
363                 case RTE_INTR_HANDLE_UIO:
364                         bytes_read = 4;
365                         break;
366                 case RTE_INTR_HANDLE_ALARM:
367                         bytes_read = sizeof(uint64_t);
368                         break;
369                 default:
370                         bytes_read = 1;
371                         break;
372                 }
373                 /**
374                  * read out to clear the ready-to-be-read flag
375                  * for epoll_wait.
376                  */
377                 bytes_read = read(events[n].data.fd, &buf, bytes_read);
378                 if (bytes_read < 0) {
379                         RTE_LOG(ERR, EAL, "Error reading from file descriptor"
380                                 " %d, error: %d\n", events[n].data.fd, errno);
381                         continue;
382                 }
383                 else if (bytes_read == 0) {
384                         RTE_LOG(ERR, EAL,
385                                 "Read nothing from file descriptor %d.\n",
386                                                         events[n].data.fd);
387                         continue;
388                 }
389                 /**
390                  * Finally, call all callbacks from the copy
391                  * we made earlier.
392                  */
393                 for (i = 0; i < active_cb; i++) {
394                         if (active_cbs[i].cb_fn == NULL)
395                                 continue;
396                         active_cbs[i].cb_fn(&src->intr_handle,
397                                         active_cbs[i].cb_arg);
398                 }
399         }
400
401         return 0;
402 }
403
404 /**
405  * It handles all the interrupts.
406  *
407  * @param pfd
408  *  epoll file descriptor.
409  * @param totalfds
410  *  The number of file descriptors added in epoll.
411  *
412  * @return
413  *  void
414  */
415 static void
416 eal_intr_handle_interrupts(int pfd, unsigned totalfds)
417 {
418         struct epoll_event events[totalfds];
419         int nfds = 0;
420
421         for(;;) {
422                 nfds = epoll_wait(pfd, events, totalfds,
423                         EAL_INTR_EPOLL_WAIT_FOREVER);
424                 /* epoll_wait fail */
425                 if (nfds < 0) {
426                         if (errno == EINTR)
427                                 continue;
428                         RTE_LOG(ERR, EAL,
429                                 "epoll_wait returns with fail\n");
430                         return;
431                 }
432                 /* epoll_wait timeout, will never happens here */
433                 else if (nfds == 0)
434                         continue;
435                 /* epoll_wait has at least one fd ready to read */
436                 if (eal_intr_process_interrupts(events, nfds) < 0)
437                         return;
438         }
439 }
440
441 /**
442  * It builds/rebuilds up the epoll file descriptor with all the
443  * file descriptors being waited on. Then handles the interrupts.
444  *
445  * @param arg
446  *  pointer. (unused)
447  *
448  * @return
449  *  never return;
450  */
451 static __attribute__((noreturn)) void *
452 eal_intr_thread_main(__rte_unused void *arg)
453 {
454         struct epoll_event ev;
455
456         /* host thread, never break out */
457         for (;;) {
458                 /* build up the epoll fd with all descriptors we are to
459                  * wait on then pass it to the handle_interrupts function
460                  */
461                 static struct epoll_event pipe_event = {
462                         .events = EPOLLIN | EPOLLPRI,
463                 };
464                 struct rte_intr_source *src;
465                 unsigned numfds = 0;
466
467                 /* create epoll fd */
468                 int pfd = epoll_create(1);
469                 if (pfd < 0)
470                         rte_panic("Cannot create epoll instance\n");
471
472                 pipe_event.data.fd = intr_pipe.readfd;
473                 /**
474                  * add pipe fd into wait list, this pipe is used to
475                  * rebuild the wait list.
476                  */
477                 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
478                                                 &pipe_event) < 0) {
479                         rte_panic("Error adding fd to %d epoll_ctl, %s\n",
480                                         intr_pipe.readfd, strerror(errno));
481                 }
482                 numfds++;
483
484                 rte_spinlock_lock(&intr_lock);
485
486                 TAILQ_FOREACH(src, &intr_sources, next) {
487                         if (src->callbacks.tqh_first == NULL)
488                                 continue; /* skip those with no callbacks */
489                         ev.events = EPOLLIN | EPOLLPRI;
490                         ev.data.fd = src->intr_handle.fd;
491
492                         /**
493                          * add all the uio device file descriptor
494                          * into wait list.
495                          */
496                         if (epoll_ctl(pfd, EPOLL_CTL_ADD,
497                                         src->intr_handle.fd, &ev) < 0){
498                                 rte_panic("Error adding fd %d epoll_ctl, %s\n",
499                                         src->intr_handle.fd, strerror(errno));
500                         }
501                         else
502                                 numfds++;
503                 }
504                 rte_spinlock_unlock(&intr_lock);
505                 /* serve the interrupt */
506                 eal_intr_handle_interrupts(pfd, numfds);
507
508                 /**
509                  * when we return, we need to rebuild the
510                  * list of fds to monitor.
511                  */
512                 close(pfd);
513         }
514 }
515
516 int
517 rte_eal_intr_init(void)
518 {
519         int ret = 0;
520
521         /* init the global interrupt source head */
522         TAILQ_INIT(&intr_sources);
523
524         /**
525          * create a pipe which will be waited by epoll and notified to
526          * rebuild the wait list of epoll.
527          */
528         if (pipe(intr_pipe.pipefd) < 0)
529                 return -1;
530
531         /* create the host thread to wait/handle the interrupt */
532         ret = pthread_create(&intr_thread, NULL,
533                         eal_intr_thread_main, NULL);
534         if (ret != 0)
535                 RTE_LOG(ERR, EAL,
536                         "Failed to create thread for interrupt handling\n");
537
538         return -ret;
539 }
540