+
+static void
+eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
+{
+ union rte_intr_read_buffer buf;
+ int bytes_read = 0;
+ int nbytes;
+
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ case RTE_INTR_HANDLE_VDEV:
+ bytes_read = intr_handle->efd_counter_size;
+ /* For vdev, number of bytes to read is set by driver */
+ break;
+ case RTE_INTR_HANDLE_EXT:
+ return;
+ default:
+ bytes_read = 1;
+ RTE_LOG(INFO, EAL, "unexpected intr type\n");
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ if (bytes_read == 0)
+ return;
+ do {
+ nbytes = read(fd, &buf, bytes_read);
+ if (nbytes < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK ||
+ errno == EAGAIN)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "Error reading from fd %d: %s\n",
+ fd, strerror(errno));
+ } else if (nbytes == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd);
+ return;
+ } while (1);
+}
+
+static int
+eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
+ struct rte_epoll_event *events)
+{
+ unsigned int i, count = 0;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < n; i++) {
+ rev = evs[i].data.ptr;
+ if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC))
+ continue;
+
+ events[count].status = RTE_EPOLL_VALID;
+ events[count].fd = rev->fd;
+ events[count].epfd = rev->epfd;
+ events[count].epdata.event = rev->epdata.event;
+ events[count].epdata.data = rev->epdata.data;
+ if (rev->epdata.cb_fun)
+ rev->epdata.cb_fun(rev->fd,
+ rev->epdata.cb_arg);
+
+ rte_compiler_barrier();
+ rev->status = RTE_EPOLL_VALID;
+ count++;
+ }
+ return count;
+}
+
+static inline int
+eal_init_tls_epfd(void)
+{
+ int pfd = epoll_create(255);
+
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Cannot create epoll instance\n");
+ return -1;
+ }
+ return pfd;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ if (RTE_PER_LCORE(_epfd) == -1)
+ RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
+
+ return RTE_PER_LCORE(_epfd);
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ struct epoll_event evs[maxevents];
+ int rc;
+
+ if (!events) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ while (1) {
+ rc = epoll_wait(epfd, evs, maxevents, timeout);
+ if (likely(rc > 0)) {
+ /* epoll_wait has at least one fd ready to read */
+ rc = eal_epoll_process_event(evs, rc, events);
+ break;
+ } else if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+ /* epoll_wait fail */
+ RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
+ strerror(errno));
+ rc = -1;
+ break;
+ } else {
+ /* rc == 0, epoll_wait timed out */
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static inline void
+eal_epoll_data_safe_free(struct rte_epoll_event *ev)
+{
+ while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_INVALID))
+ while (ev->status != RTE_EPOLL_VALID)
+ rte_pause();
+ memset(&ev->epdata, 0, sizeof(ev->epdata));
+ ev->fd = -1;
+ ev->epfd = -1;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event)
+{
+ struct epoll_event ev;
+
+ if (!event) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ if (op == EPOLL_CTL_ADD) {
+ event->status = RTE_EPOLL_VALID;
+ event->fd = fd; /* ignore fd in event */
+ event->epfd = epfd;
+ ev.data.ptr = (void *)event;
+ }
+
+ ev.events = event->epdata.event;
+ if (epoll_ctl(epfd, op, fd, &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+ op, fd, strerror(errno));
+ if (op == EPOLL_CTL_ADD)
+ /* rollback status when CTL_ADD fail */
+ event->status = RTE_EPOLL_INVALID;
+ return -1;
+ }
+
+ if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID)
+ eal_epoll_data_safe_free(event);
+
+ return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
+ int op, unsigned int vec, void *data)
+{
+ struct rte_epoll_event *rev;
+ struct rte_epoll_data *epdata;
+ int epfd_op;
+ unsigned int efd_idx;
+ int rc = 0;
+
+ efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+ (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+
+ if (!intr_handle || intr_handle->nb_efd == 0 ||
+ efd_idx >= intr_handle->nb_efd) {
+ RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
+ return -EPERM;
+ }
+
+ switch (op) {
+ case RTE_INTR_EVENT_ADD:
+ epfd_op = EPOLL_CTL_ADD;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status != RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event already been added.\n");
+ return -EEXIST;
+ }
+
+ /* attach to intr vector fd */
+ epdata = &rev->epdata;
+ epdata->event = EPOLLIN | EPOLLPRI | EPOLLET;
+ epdata->data = data;
+ epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
+ epdata->cb_arg = (void *)intr_handle;
+ rc = rte_epoll_ctl(epfd, epfd_op,
+ intr_handle->efds[efd_idx], rev);
+ if (!rc)
+ RTE_LOG(DEBUG, EAL,
+ "efd %d associated with vec %d added on epfd %d"
+ "\n", rev->fd, vec, epfd);
+ else
+ rc = -EPERM;
+ break;
+ case RTE_INTR_EVENT_DEL:
+ epfd_op = EPOLL_CTL_DEL;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status == RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event does not exist.\n");
+ return -EPERM;
+ }
+
+ rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
+ if (rc)
+ rc = -EPERM;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "event op type mismatch\n");
+ rc = -EPERM;
+ }
+
+ return rc;
+}
+
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < intr_handle->nb_efd; i++) {
+ rev = &intr_handle->elist[i];
+ if (rev->status == RTE_EPOLL_INVALID)
+ continue;
+ if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
+ /* force free if the entry valid */
+ eal_epoll_data_safe_free(rev);
+ rev->status = RTE_EPOLL_INVALID;
+ }
+ }
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+ uint32_t i;
+ int fd;
+ uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+ assert(nb_efd != 0);
+
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
+ for (i = 0; i < n; i++) {
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "can't setup eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+ intr_handle->efds[i] = fd;
+ }
+ intr_handle->nb_efd = n;
+ intr_handle->max_intr = NB_OTHER_INTR + n;
+ } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ /* only check, initialization would be done in vdev driver.*/
+ if (intr_handle->efd_counter_size >
+ sizeof(union rte_intr_read_buffer)) {
+ RTE_LOG(ERR, EAL, "the efd_counter_size is oversized");
+ return -EINVAL;
+ }
+ } else {
+ intr_handle->efds[0] = intr_handle->fd;
+ intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
+ intr_handle->max_intr = NB_OTHER_INTR;
+ }
+
+ return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+
+ rte_intr_free_epoll_fd(intr_handle);
+ if (intr_handle->max_intr > intr_handle->nb_efd) {
+ for (i = 0; i < intr_handle->nb_efd; i++)
+ close(intr_handle->efds[i]);
+ }
+ intr_handle->nb_efd = 0;
+ intr_handle->max_intr = 0;
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+ return !(!intr_handle->nb_efd);
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+ if (!rte_intr_dp_is_en(intr_handle))
+ return 1;
+ else
+ return !!(intr_handle->max_intr - intr_handle->nb_efd);
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
+ return 1;
+
+ if (intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 1;
+
+ return 0;
+}