mbuf: add rte prefix to offload flags
[dpdk.git] / drivers / net / vhost / rte_eth_vhost.c
index 951929c..0cead44 100644 (file)
@@ -5,10 +5,11 @@
 #include <unistd.h>
 #include <pthread.h>
 #include <stdbool.h>
+#include <sys/epoll.h>
 
 #include <rte_mbuf.h>
-#include <rte_ethdev_driver.h>
-#include <rte_ethdev_vdev.h>
+#include <ethdev_driver.h>
+#include <ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_bus_vdev.h>
@@ -18,7 +19,7 @@
 
 #include "rte_eth_vhost.h"
 
-RTE_LOG_REGISTER(vhost_logtype, pmd.net.vhost, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE);
 
 #define VHOST_LOG(level, ...) \
        rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
@@ -28,7 +29,6 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 #define ETH_VHOST_IFACE_ARG            "iface"
 #define ETH_VHOST_QUEUES_ARG           "queues"
 #define ETH_VHOST_CLIENT_ARG           "client"
-#define ETH_VHOST_DEQUEUE_ZERO_COPY    "dequeue-zero-copy"
 #define ETH_VHOST_IOMMU_SUPPORT                "iommu-support"
 #define ETH_VHOST_POSTCOPY_SUPPORT     "postcopy-support"
 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso"
@@ -40,7 +40,6 @@ static const char *valid_arguments[] = {
        ETH_VHOST_IFACE_ARG,
        ETH_VHOST_QUEUES_ARG,
        ETH_VHOST_CLIENT_ARG,
-       ETH_VHOST_DEQUEUE_ZERO_COPY,
        ETH_VHOST_IOMMU_SUPPORT,
        ETH_VHOST_POSTCOPY_SUPPORT,
        ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
@@ -72,6 +71,9 @@ enum vhost_xstats_pkts {
        VHOST_BROADCAST_PKT,
        VHOST_MULTICAST_PKT,
        VHOST_UNICAST_PKT,
+       VHOST_PKT,
+       VHOST_BYTE,
+       VHOST_MISSED_PKT,
        VHOST_ERRORS_PKT,
        VHOST_ERRORS_FRAGMENTED,
        VHOST_ERRORS_JABBER,
@@ -95,6 +97,8 @@ struct vhost_queue {
        uint16_t port;
        uint16_t virtqueue_id;
        struct vhost_stats stats;
+       int intr_enable;
+       rte_spinlock_t intr_lock;
 };
 
 struct pmd_internal {
@@ -146,11 +150,11 @@ struct vhost_xstats_name_off {
 /* [rx]_is prepended to the name string here */
 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = {
        {"good_packets",
-        offsetof(struct vhost_queue, stats.pkts)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
        {"total_bytes",
-        offsetof(struct vhost_queue, stats.bytes)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
        {"missed_pkts",
-        offsetof(struct vhost_queue, stats.missed_pkts)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
        {"broadcast_packets",
         offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
        {"multicast_packets",
@@ -186,11 +190,11 @@ static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = {
 /* [tx]_ is prepended to the name string here */
 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = {
        {"good_packets",
-        offsetof(struct vhost_queue, stats.pkts)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
        {"total_bytes",
-        offsetof(struct vhost_queue, stats.bytes)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
        {"missed_pkts",
-        offsetof(struct vhost_queue, stats.missed_pkts)},
+        offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
        {"broadcast_packets",
         offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
        {"multicast_packets",
@@ -284,23 +288,6 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        if (n < nxstats)
                return nxstats;
 
-       for (i = 0; i < dev->data->nb_rx_queues; i++) {
-               vq = dev->data->rx_queues[i];
-               if (!vq)
-                       continue;
-               vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts
-                               - (vq->stats.xstats[VHOST_BROADCAST_PKT]
-                               + vq->stats.xstats[VHOST_MULTICAST_PKT]);
-       }
-       for (i = 0; i < dev->data->nb_tx_queues; i++) {
-               vq = dev->data->tx_queues[i];
-               if (!vq)
-                       continue;
-               vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts
-                               + vq->stats.missed_pkts
-                               - (vq->stats.xstats[VHOST_BROADCAST_PKT]
-                               + vq->stats.xstats[VHOST_MULTICAST_PKT]);
-       }
        for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
                xstats[count].value = 0;
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -331,7 +318,7 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 }
 
 static inline void
-vhost_count_multicast_broadcast(struct vhost_queue *vq,
+vhost_count_xcast_packets(struct vhost_queue *vq,
                                struct rte_mbuf *mbuf)
 {
        struct rte_ether_addr *ea = NULL;
@@ -343,20 +330,27 @@ vhost_count_multicast_broadcast(struct vhost_queue *vq,
                        pstats->xstats[VHOST_BROADCAST_PKT]++;
                else
                        pstats->xstats[VHOST_MULTICAST_PKT]++;
+       } else {
+               pstats->xstats[VHOST_UNICAST_PKT]++;
        }
 }
 
 static void
-vhost_update_packet_xstats(struct vhost_queue *vq,
-                          struct rte_mbuf **bufs,
-                          uint16_t count)
+vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
+                          uint16_t count, uint64_t nb_bytes,
+                          uint64_t nb_missed)
 {
        uint32_t pkt_len = 0;
        uint64_t i = 0;
        uint64_t index;
        struct vhost_stats *pstats = &vq->stats;
 
+       pstats->xstats[VHOST_BYTE] += nb_bytes;
+       pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
+       pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
+
        for (i = 0; i < count ; i++) {
+               pstats->xstats[VHOST_PKT]++;
                pkt_len = bufs[i]->pkt_len;
                if (pkt_len == 64) {
                        pstats->xstats[VHOST_64_PKT]++;
@@ -372,7 +366,7 @@ vhost_update_packet_xstats(struct vhost_queue *vq,
                        else if (pkt_len > 1522)
                                pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
                }
-               vhost_count_multicast_broadcast(vq, bufs[i]);
+               vhost_count_xcast_packets(vq, bufs[i]);
        }
 }
 
@@ -382,6 +376,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        struct vhost_queue *r = q;
        uint16_t i, nb_rx = 0;
        uint16_t nb_receive = nb_bufs;
+       uint64_t nb_bytes = 0;
 
        if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
                return 0;
@@ -416,10 +411,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                if (r->internal->vlan_strip)
                        rte_vlan_strip(bufs[i]);
 
-               r->stats.bytes += bufs[i]->pkt_len;
+               nb_bytes += bufs[i]->pkt_len;
        }
 
-       vhost_update_packet_xstats(r, bufs, nb_rx);
+       r->stats.bytes += nb_bytes;
+       vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
 
 out:
        rte_atomic32_set(&r->while_queuing, 0);
@@ -433,6 +429,8 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        struct vhost_queue *r = q;
        uint16_t i, nb_tx = 0;
        uint16_t nb_send = 0;
+       uint64_t nb_bytes = 0;
+       uint64_t nb_missed = 0;
 
        if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
                return 0;
@@ -446,7 +444,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                struct rte_mbuf *m = bufs[i];
 
                /* Do VLAN tag insertion */
-               if (m->ol_flags & PKT_TX_VLAN_PKT) {
+               if (m->ol_flags & RTE_MBUF_F_TX_VLAN_PKT) {
                        int error = rte_vlan_insert(&m);
                        if (unlikely(error)) {
                                rte_pktmbuf_free(m);
@@ -473,20 +471,23 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                        break;
        }
 
+       for (i = 0; likely(i < nb_tx); i++)
+               nb_bytes += bufs[i]->pkt_len;
+
+       nb_missed = nb_bufs - nb_tx;
+
        r->stats.pkts += nb_tx;
+       r->stats.bytes += nb_bytes;
        r->stats.missed_pkts += nb_bufs - nb_tx;
 
-       for (i = 0; likely(i < nb_tx); i++)
-               r->stats.bytes += bufs[i]->pkt_len;
-
-       vhost_update_packet_xstats(r, bufs, nb_tx);
+       vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
 
-       /* According to RFC2863 page42 section ifHCOutMulticastPkts and
-        * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast"
-        * are increased when packets are not transmitted successfully.
+       /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
+        * ifHCOutBroadcastPkts counters are increased when packets are not
+        * transmitted successfully.
         */
        for (i = nb_tx; i < nb_bufs; i++)
-               vhost_count_multicast_broadcast(r, bufs[i]);
+               vhost_count_xcast_packets(r, bufs[i]);
 
        for (i = 0; likely(i < nb_tx); i++)
                rte_pktmbuf_free(bufs[i]);
@@ -524,12 +525,58 @@ find_internal_resource(char *ifname)
        return list;
 }
 
+static int
+eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
+{
+       struct rte_intr_handle *handle = eth_dev->intr_handle;
+       struct rte_epoll_event rev;
+       int epfd, ret;
+
+       if (!handle)
+               return 0;
+
+       if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
+               return 0;
+
+       VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
+                       rxq_idx);
+
+       if (handle->elist[rxq_idx].fd != -1)
+               VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n",
+                               handle->elist[rxq_idx].fd);
+
+       /*
+        * First remove invalid epoll event, and then install
+        * the new one. May be solved with a proper API in the
+        * future.
+        */
+       epfd = handle->elist[rxq_idx].epfd;
+       rev = handle->elist[rxq_idx];
+       ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
+                       &handle->elist[rxq_idx]);
+       if (ret) {
+               VHOST_LOG(ERR, "Delete epoll event failed.\n");
+               return ret;
+       }
+
+       rev.fd = handle->efds[rxq_idx];
+       handle->elist[rxq_idx] = rev;
+       ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
+                       &handle->elist[rxq_idx]);
+       if (ret) {
+               VHOST_LOG(ERR, "Add epoll event failed.\n");
+               return ret;
+       }
+
+       return 0;
+}
+
 static int
 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
 {
        struct rte_vhost_vring vring;
        struct vhost_queue *vq;
-       int ret = 0;
+       int old_intr_enable, ret = 0;
 
        vq = dev->data->rx_queues[qid];
        if (!vq) {
@@ -537,6 +584,18 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
                return -1;
        }
 
+       rte_spinlock_lock(&vq->intr_lock);
+       old_intr_enable = vq->intr_enable;
+       vq->intr_enable = 1;
+       ret = eth_vhost_update_intr(dev, qid);
+       rte_spinlock_unlock(&vq->intr_lock);
+
+       if (ret < 0) {
+               VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
+               vq->intr_enable = old_intr_enable;
+               return ret;
+       }
+
        ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
        if (ret < 0) {
                VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
@@ -571,6 +630,8 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
        rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
        rte_wmb();
 
+       vq->intr_enable = 0;
+
        return 0;
 }
 
@@ -593,7 +654,6 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 {
        struct rte_vhost_vring vring;
        struct vhost_queue *vq;
-       int count = 0;
        int nb_rxq = dev->data->nb_rx_queues;
        int i;
        int ret;
@@ -623,6 +683,8 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 
        VHOST_LOG(INFO, "Prepare intr vec\n");
        for (i = 0; i < nb_rxq; i++) {
+               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
+               dev->intr_handle->efds[i] = -1;
                vq = dev->data->rx_queues[i];
                if (!vq) {
                        VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
@@ -641,14 +703,12 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
                                "rxq-%d's kickfd is invalid, skip!\n", i);
                        continue;
                }
-               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
                dev->intr_handle->efds[i] = vring.kickfd;
-               count++;
                VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
        }
 
-       dev->intr_handle->nb_efd = count;
-       dev->intr_handle->max_intr = count + 1;
+       dev->intr_handle->nb_efd = nb_rxq;
+       dev->intr_handle->max_intr = nb_rxq + 1;
        dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
 
        return 0;
@@ -770,7 +830,7 @@ new_device(int vid)
 
        VHOST_LOG(INFO, "Vhost device %d created\n", vid);
 
-       _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+       rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 
        return 0;
 }
@@ -827,7 +887,7 @@ destroy_device(int vid)
        VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
        eth_vhost_uninstall_intr(eth_dev);
 
-       _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+       rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 static int
@@ -835,6 +895,7 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
 {
        struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
        struct pmd_internal *internal = eth_dev->data->dev_private;
+       struct vhost_queue *vq;
        struct rte_vhost_vring vring;
        int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
        int ret = 0;
@@ -853,12 +914,18 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
                                        vring_id);
                        return ret;
                }
+               eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
 
-               if (vring.kickfd != eth_dev->intr_handle->efds[rx_idx]) {
-                       VHOST_LOG(INFO, "kickfd for rxq-%d was changed.\n",
-                                         rx_idx);
-                       eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
+               vq = eth_dev->data->rx_queues[rx_idx];
+               if (!vq) {
+                       VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
+                       return -1;
                }
+
+               rte_spinlock_lock(&vq->intr_lock);
+               if (vq->intr_enable)
+                       ret = eth_vhost_update_intr(eth_dev, rx_idx);
+               rte_spinlock_unlock(&vq->intr_lock);
        }
 
        return ret;
@@ -899,7 +966,7 @@ vring_state_changed(int vid, uint16_t vring, int enable)
        VHOST_LOG(INFO, "vring%u is %s\n",
                        vring, enable ? "enabled" : "disabled");
 
-       _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
+       rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
 
        return 0;
 }
@@ -1086,27 +1153,33 @@ eth_dev_start(struct rte_eth_dev *eth_dev)
        return 0;
 }
 
-static void
+static int
 eth_dev_stop(struct rte_eth_dev *dev)
 {
        struct pmd_internal *internal = dev->data->dev_private;
 
+       dev->data->dev_started = 0;
        rte_atomic32_set(&internal->started, 0);
        update_queuing_status(dev);
+
+       return 0;
 }
 
-static void
+static int
 eth_dev_close(struct rte_eth_dev *dev)
 {
        struct pmd_internal *internal;
        struct internal_list *list;
-       unsigned int i;
+       unsigned int i, ret;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
 
        internal = dev->data->dev_private;
        if (!internal)
-               return;
+               return 0;
 
-       eth_dev_stop(dev);
+       ret = eth_dev_stop(dev);
 
        list = find_internal_resource(internal->iface_name);
        if (list) {
@@ -1132,6 +1205,8 @@ eth_dev_close(struct rte_eth_dev *dev)
 
        rte_free(vring_states[dev->data->port_id]);
        vring_states[dev->data->port_id] = NULL;
+
+       return ret;
 }
 
 static int
@@ -1152,6 +1227,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 
        vq->mb_pool = mb_pool;
        vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
+       rte_spinlock_init(&vq->intr_lock);
        dev->data->rx_queues[rx_queue_id] = vq;
 
        return 0;
@@ -1173,6 +1249,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
        }
 
        vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
+       rte_spinlock_init(&vq->intr_lock);
        dev->data->tx_queues[tx_queue_id] = vq;
 
        return 0;
@@ -1314,7 +1391,6 @@ static const struct eth_dev_ops ops = {
        .rx_queue_release = eth_queue_release,
        .tx_queue_release = eth_queue_release,
        .tx_done_cleanup = eth_tx_done_cleanup,
-       .rx_queue_count = eth_rx_queue_count,
        .link_update = eth_link_update,
        .stats_get = eth_stats_get,
        .stats_reset = eth_stats_reset,
@@ -1371,11 +1447,13 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
        internal->flags = flags;
        internal->disable_flags = disable_flags;
        data->dev_link = pmd_link;
-       data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE;
+       data->dev_flags = RTE_ETH_DEV_INTR_LSC |
+                               RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
        data->promiscuous = 1;
        data->all_multicast = 1;
 
        eth_dev->dev_ops = &ops;
+       eth_dev->rx_queue_count = eth_rx_queue_count;
 
        /* finally assign rx and tx ops */
        eth_dev->rx_pkt_burst = eth_vhost_rx;
@@ -1430,7 +1508,6 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
        uint64_t flags = 0;
        uint64_t disable_flags = 0;
        int client_mode = 0;
-       int dequeue_zero_copy = 0;
        int iommu_support = 0;
        int postcopy_support = 0;
        int tso = 0;
@@ -1490,16 +1567,6 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
                        flags |= RTE_VHOST_USER_CLIENT;
        }
 
-       if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) {
-               ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY,
-                                        &open_int, &dequeue_zero_copy);
-               if (ret < 0)
-                       goto out_free;
-
-               if (dequeue_zero_copy)
-                       flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
-       }
-
        if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
                ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
                                         &open_int, &iommu_support);
@@ -1582,11 +1649,7 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return 0;
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return rte_eth_dev_release_port(eth_dev);
-
        eth_dev_close(eth_dev);
-
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
@@ -1603,7 +1666,6 @@ RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
        "iface=<ifc> "
        "queues=<int> "
        "client=<0|1> "
-       "dequeue-zero-copy=<0|1> "
        "iommu-support=<0|1> "
        "postcopy-support=<0|1> "
        "tso=<0|1> "