X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_interrupts.c;h=056d41c1292a84e33bcc522be9c5e26597891e0e;hb=6383d2642b62f430c0ddb991dcfa75a5b2a4aec6;hp=bd9fc5f1753934a75a43dd5d63a9139559064d42;hpb=3031749c2df04a63cdcef186dcce3781e61436e8;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index bd9fc5f175..056d41c129 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -1,34 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation */ #include @@ -36,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -44,30 +14,35 @@ #include #include #include +#include +#include +#include +#include #include #include #include -#include #include -#include #include #include #include #include #include -#include #include #include -#include -#include #include #include #include +#include #include "eal_private.h" +#include "eal_vfio.h" +#include "eal_thread.h" #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) +#define NB_OTHER_INTR 1 + +static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ /** * union for pipe fds. @@ -87,6 +62,9 @@ union intr_pipefds{ */ union rte_intr_read_buffer { int uio_intr_count; /* for uio device */ +#ifdef VFIO_PRESENT + uint64_t vfio_intr_count; /* for vfio device */ +#endif uint64_t timerfd_num; /* for timerfd */ char charbuf[16]; /* for others */ }; @@ -119,8 +97,298 @@ static struct rte_intr_source_list intr_sources; /* interrupt handling thread */ static pthread_t intr_thread; +/* VFIO interrupts */ +#ifdef VFIO_PRESENT + +#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) +/* irq set buffer length for queue interrupts and LSC interrupt */ +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ + sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) + +/* enable legacy (INTx) interrupts */ +static int +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable legacy (INTx) interrupts */ +static int +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } + return 0; +} + +/* enable MSI interrupts */ +static int +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +/* disable MSI-X interrupts */ +static int +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); + + return ret; +} +#endif + +static int +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* disable interrupts */ + command_high |= 0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* enable interrupts */ + command_high &= ~0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intr_disable(const struct rte_intr_handle *intr_handle) +{ + const int value = 0; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +static int +uio_intr_enable(const struct rte_intr_handle *intr_handle) +{ + const int value = 1; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -188,11 +456,11 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, if (write(intr_pipe.writefd, "1", 1) < 0) return -EPIPE; - return (ret); + return ret; } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -252,30 +520,48 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, ret = -EPIPE; } - return (ret); + return ret; } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { - const int value = 1; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; - if (!intr_handle || intr_handle->fd < 0) + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; switch (intr_handle->type){ /* write to the uio fd to enable the interrupt */ case RTE_INTR_HANDLE_UIO: - if (write(intr_handle->fd, &value, sizeof(value)) < 0) { - RTE_LOG(ERR, EAL, - "Error enabling interrupts for fd %d\n", - intr_handle->fd); + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) return -1; - } break; /* not used at this moment */ case RTE_INTR_HANDLE_ALARM: return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_enable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_enable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_enable_intx(intr_handle)) + return -1; + break; +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; /* unknown handle type */ default: RTE_LOG(ERR, EAL, @@ -288,26 +574,44 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { - const int value = 0; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; - if (!intr_handle || intr_handle->fd < 0) + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; switch (intr_handle->type){ /* write to the uio fd to disable the interrupt */ case RTE_INTR_HANDLE_UIO: - if (write(intr_handle->fd, &value, sizeof(value)) < 0){ - RTE_LOG(ERR, EAL, - "Error enabling interrupts for fd %d\n", - intr_handle->fd); + if (uio_intr_disable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_disable(intr_handle)) return -1; - } break; /* not used at this moment */ case RTE_INTR_HANDLE_ALARM: return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_disable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_disable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_disable_intx(intr_handle)) + return -1; + break; +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; /* unknown handle type */ default: RTE_LOG(ERR, EAL, @@ -322,6 +626,7 @@ rte_intr_disable(struct rte_intr_handle *intr_handle) static int eal_intr_process_interrupts(struct epoll_event *events, int nfds) { + bool call = false; int n, bytes_read; struct rte_intr_source *src; struct rte_intr_callback *cb; @@ -357,34 +662,58 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) /* set the length to be read dor different handle type */ switch (src->intr_handle.type) { case RTE_INTR_HANDLE_UIO: - bytes_read = 4; + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); break; case RTE_INTR_HANDLE_ALARM: - bytes_read = sizeof(uint64_t); + bytes_read = sizeof(buf.timerfd_num); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + case RTE_INTR_HANDLE_VDEV: + case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + case RTE_INTR_HANDLE_DEV_EVENT: + bytes_read = 0; + call = true; break; default: bytes_read = 1; break; } - /** - * read out to clear the ready-to-be-read flag - * for epoll_wait. - */ - bytes_read = read(events[n].data.fd, &buf, bytes_read); - - if (bytes_read < 0) - RTE_LOG(ERR, EAL, "Error reading from file " - "descriptor %d: %s\n", events[n].data.fd, - strerror(errno)); - else if (bytes_read == 0) - RTE_LOG(ERR, EAL, "Read nothing from file " - "descriptor %d\n", events[n].data.fd); + if (bytes_read > 0) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(events[n].data.fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", events[n].data.fd); + else + call = true; + } /* grab a lock, again to call callbacks and update status. */ rte_spinlock_lock(&intr_lock); - if (bytes_read > 0) { + if (call) { /* Finally, call all callbacks. */ TAILQ_FOREACH(cb, &src->callbacks, next) { @@ -394,10 +723,9 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) rte_spinlock_unlock(&intr_lock); /* call the actual callback */ - active_cb.cb_fn(&src->intr_handle, - active_cb.cb_arg); + active_cb.cb_fn(active_cb.cb_arg); - /*get the lcok back. */ + /*get the lock back. */ rte_spinlock_lock(&intr_lock); } } @@ -495,7 +823,7 @@ eal_intr_thread_main(__rte_unused void *arg) TAILQ_FOREACH(src, &intr_sources, next) { if (src->callbacks.tqh_first == NULL) continue; /* skip those with no callbacks */ - ev.events = EPOLLIN | EPOLLPRI; + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; ev.data.fd = src->intr_handle.fd; /** @@ -534,16 +862,371 @@ rte_eal_intr_init(void) * create a pipe which will be waited by epoll and notified to * rebuild the wait list of epoll. */ - if (pipe(intr_pipe.pipefd) < 0) + if (pipe(intr_pipe.pipefd) < 0) { + rte_errno = errno; return -1; + } /* create the host thread to wait/handle the interrupt */ - ret = pthread_create(&intr_thread, NULL, + ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, eal_intr_thread_main, NULL); - if (ret != 0) + if (ret != 0) { + rte_errno = -ret; RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling\n"); + } - return -ret; + return ret; +} + +static void +eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) +{ + union rte_intr_read_buffer buf; + int bytes_read = 0; + int nbytes; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + case RTE_INTR_HANDLE_VDEV: + bytes_read = intr_handle->efd_counter_size; + /* For vdev, number of bytes to read is set by driver */ + break; + case RTE_INTR_HANDLE_EXT: + return; + default: + bytes_read = 1; + RTE_LOG(INFO, EAL, "unexpected intr type\n"); + break; + } + + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + if (bytes_read == 0) + return; + do { + nbytes = read(fd, &buf, bytes_read); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK || + errno == EAGAIN) + continue; + RTE_LOG(ERR, EAL, + "Error reading from fd %d: %s\n", + fd, strerror(errno)); + } else if (nbytes == 0) + RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); + return; + } while (1); +} + +static int +eal_epoll_process_event(struct epoll_event *evs, unsigned int n, + struct rte_epoll_event *events) +{ + unsigned int i, count = 0; + struct rte_epoll_event *rev; + + for (i = 0; i < n; i++) { + rev = evs[i].data.ptr; + if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID, + RTE_EPOLL_EXEC)) + continue; + + events[count].status = RTE_EPOLL_VALID; + events[count].fd = rev->fd; + events[count].epfd = rev->epfd; + events[count].epdata.event = rev->epdata.event; + events[count].epdata.data = rev->epdata.data; + if (rev->epdata.cb_fun) + rev->epdata.cb_fun(rev->fd, + rev->epdata.cb_arg); + + rte_compiler_barrier(); + rev->status = RTE_EPOLL_VALID; + count++; + } + return count; } +static inline int +eal_init_tls_epfd(void) +{ + int pfd = epoll_create(255); + + if (pfd < 0) { + RTE_LOG(ERR, EAL, + "Cannot create epoll instance\n"); + return -1; + } + return pfd; +} + +int +rte_intr_tls_epfd(void) +{ + if (RTE_PER_LCORE(_epfd) == -1) + RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); + + return RTE_PER_LCORE(_epfd); +} + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + struct epoll_event evs[maxevents]; + int rc; + + if (!events) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + while (1) { + rc = epoll_wait(epfd, evs, maxevents, timeout); + if (likely(rc > 0)) { + /* epoll_wait has at least one fd ready to read */ + rc = eal_epoll_process_event(evs, rc, events); + break; + } else if (rc < 0) { + if (errno == EINTR) + continue; + /* epoll_wait fail */ + RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", + strerror(errno)); + rc = -1; + break; + } else { + /* rc == 0, epoll_wait timed out */ + break; + } + } + + return rc; +} + +static inline void +eal_epoll_data_safe_free(struct rte_epoll_event *ev) +{ + while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID, + RTE_EPOLL_INVALID)) + while (ev->status != RTE_EPOLL_VALID) + rte_pause(); + memset(&ev->epdata, 0, sizeof(ev->epdata)); + ev->fd = -1; + ev->epfd = -1; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event) +{ + struct epoll_event ev; + + if (!event) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + if (op == EPOLL_CTL_ADD) { + event->status = RTE_EPOLL_VALID; + event->fd = fd; /* ignore fd in event */ + event->epfd = epfd; + ev.data.ptr = (void *)event; + } + + ev.events = event->epdata.event; + if (epoll_ctl(epfd, op, fd, &ev) < 0) { + RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", + op, fd, strerror(errno)); + if (op == EPOLL_CTL_ADD) + /* rollback status when CTL_ADD fail */ + event->status = RTE_EPOLL_INVALID; + return -1; + } + + if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID) + eal_epoll_data_safe_free(event); + + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, + int op, unsigned int vec, void *data) +{ + struct rte_epoll_event *rev; + struct rte_epoll_data *epdata; + int epfd_op; + unsigned int efd_idx; + int rc = 0; + + efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? + (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; + + if (!intr_handle || intr_handle->nb_efd == 0 || + efd_idx >= intr_handle->nb_efd) { + RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); + return -EPERM; + } + + switch (op) { + case RTE_INTR_EVENT_ADD: + epfd_op = EPOLL_CTL_ADD; + rev = &intr_handle->elist[efd_idx]; + if (rev->status != RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event already been added.\n"); + return -EEXIST; + } + + /* attach to intr vector fd */ + epdata = &rev->epdata; + epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; + epdata->data = data; + epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; + epdata->cb_arg = (void *)intr_handle; + rc = rte_epoll_ctl(epfd, epfd_op, + intr_handle->efds[efd_idx], rev); + if (!rc) + RTE_LOG(DEBUG, EAL, + "efd %d associated with vec %d added on epfd %d" + "\n", rev->fd, vec, epfd); + else + rc = -EPERM; + break; + case RTE_INTR_EVENT_DEL: + epfd_op = EPOLL_CTL_DEL; + rev = &intr_handle->elist[efd_idx]; + if (rev->status == RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event does not exist.\n"); + return -EPERM; + } + + rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); + if (rc) + rc = -EPERM; + break; + default: + RTE_LOG(ERR, EAL, "event op type mismatch\n"); + rc = -EPERM; + } + + return rc; +} + +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + uint32_t i; + int fd; + uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + + assert(nb_efd != 0); + + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { + for (i = 0; i < n; i++) { + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, + "can't setup eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -errno; + } + intr_handle->efds[i] = fd; + } + intr_handle->nb_efd = n; + intr_handle->max_intr = NB_OTHER_INTR + n; + } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { + /* only check, initialization would be done in vdev driver.*/ + if (intr_handle->efd_counter_size > + sizeof(union rte_intr_read_buffer)) { + RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); + return -EINVAL; + } + } else { + intr_handle->efds[0] = intr_handle->fd; + intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); + intr_handle->max_intr = NB_OTHER_INTR; + } + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + + rte_intr_free_epoll_fd(intr_handle); + if (intr_handle->max_intr > intr_handle->nb_efd) { + for (i = 0; i < intr_handle->nb_efd; i++) + close(intr_handle->efds[i]); + } + intr_handle->nb_efd = 0; + intr_handle->max_intr = 0; +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + return !(!intr_handle->nb_efd); +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + if (!rte_intr_dp_is_en(intr_handle)) + return 1; + else + return !!(intr_handle->max_intr - intr_handle->nb_efd); +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) + return 1; + + if (intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 1; + + return 0; +}