net: add rte prefix to ether structures
[dpdk.git] / drivers / net / netvsc / hn_rxtx.c
index 400598a..a5850c2 100644 (file)
@@ -10,6 +10,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <strings.h>
+#include <malloc.h>
 
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
@@ -24,6 +25,7 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#include <rte_net.h>
 #include <rte_bus_vmbus.h>
 #include <rte_spinlock.h>
 
@@ -40,7 +42,7 @@
 #define HN_TXCOPY_THRESHOLD    512
 
 #define HN_RXCOPY_THRESHOLD    256
-#define HN_RXQ_EVENT_DEFAULT   1024
+#define HN_RXQ_EVENT_DEFAULT   2048
 
 struct hn_rxinfo {
        uint32_t        vlan_info;
@@ -106,7 +108,7 @@ static void
 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
 {
        uint32_t s = m->pkt_len;
-       const struct ether_addr *ea;
+       const struct rte_ether_addr *ea;
 
        if (s == 64) {
                stats->size_bins[1]++;
@@ -121,11 +123,11 @@ hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
                        stats->size_bins[0]++;
                else if (s < 1519)
                        stats->size_bins[6]++;
-               else if (s >= 1519)
+               else
                        stats->size_bins[7]++;
        }
 
-       ea = rte_pktmbuf_mtod(m, const struct ether_addr *);
+       ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *);
        if (is_multicast_ether_addr(ea)) {
                if (is_broadcast_ether_addr(ea))
                        stats->broadcast++;
@@ -197,6 +199,17 @@ hn_tx_pool_init(struct rte_eth_dev *dev)
        return 0;
 }
 
+void
+hn_tx_pool_uninit(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       if (hv->tx_pool) {
+               rte_mempool_free(hv->tx_pool);
+               hv->tx_pool = NULL;
+       }
+}
+
 static void hn_reset_txagg(struct hn_tx_queue *txq)
 {
        txq->agg_szleft = txq->agg_szmax;
@@ -215,6 +228,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
        struct hn_data *hv = dev->data->dev_private;
        struct hn_tx_queue *txq;
        uint32_t tx_free_thresh;
+       int err;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -244,8 +258,14 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        hn_reset_txagg(txq);
 
-       dev->data->tx_queues[queue_idx] = txq;
+       err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
+                                    socket_id, tx_conf);
+       if (err) {
+               rte_free(txq);
+               return err;
+       }
 
+       dev->data->tx_queues[queue_idx] = txq;
        return 0;
 }
 
@@ -484,10 +504,22 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
        m->port = rxq->port_id;
        m->pkt_len = dlen;
        m->data_len = dlen;
+       m->packet_type = rte_net_get_ptype(m, NULL,
+                                          RTE_PTYPE_L2_MASK |
+                                          RTE_PTYPE_L3_MASK |
+                                          RTE_PTYPE_L4_MASK);
 
        if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
                m->vlan_tci = info->vlan_info;
                m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
+
+               /* NDIS always strips tag, put it back if necessary */
+               if (!hv->vlan_strip && rte_vlan_insert(&m)) {
+                       PMD_DRV_LOG(DEBUG, "vlan insert failed");
+                       ++rxq->stats.errors;
+                       rte_pktmbuf_free(m);
+                       return;
+               }
        }
 
        if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
@@ -497,6 +529,9 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
                if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK
                                       | NDIS_RXCSUM_INFO_TCPCS_OK))
                        m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+               else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED
+                                           | NDIS_RXCSUM_INFO_UDPCS_FAILED))
+                       m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
        }
 
        if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
@@ -504,16 +539,17 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
                m->hash.rss = info->hash_value;
        }
 
-       PMD_RX_LOG(DEBUG, "port %u:%u RX id %" PRIu64 " size %u ol_flags %#" PRIx64,
+       PMD_RX_LOG(DEBUG,
+                  "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64,
                   rxq->port_id, rxq->queue_id, rxb->xactid,
-                  m->pkt_len, m->ol_flags);
+                  m->pkt_len, m->packet_type, m->ol_flags);
 
        ++rxq->stats.packets;
        rxq->stats.bytes += m->pkt_len;
        hn_update_packet_stats(&rxq->stats, m);
 
        if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) {
-               ++rxq->ring_full;
+               ++rxq->stats.ring_full;
                rte_pktmbuf_free(m);
        }
 }
@@ -580,7 +616,7 @@ error:
 }
 
 static void
-hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
+hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
                 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len)
 {
        const struct rndis_msghdr *hdr = buf;
@@ -592,7 +628,7 @@ hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
                break;
 
        case RNDIS_INDICATE_STATUS_MSG:
-               hn_rndis_link_status(rxq->hv, buf);
+               hn_rndis_link_status(dev, buf);
                break;
 
        case RNDIS_INITIALIZE_CMPLT:
@@ -692,6 +728,35 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
        hn_rx_buf_release(rxb);
 }
 
+/*
+ * Called when NVS inband events are received.
+ * Send up a two part message with port_id and the NVS message
+ * to the pipe to the netvsc-vf-event control thread.
+ */
+static void hn_nvs_handle_notify(struct rte_eth_dev *dev,
+                                const struct vmbus_chanpkt_hdr *pkt,
+                                const void *data)
+{
+       const struct hn_nvs_hdr *hdr = data;
+
+       switch (hdr->type) {
+       case NVS_TYPE_TXTBL_NOTE:
+               /* Transmit indirection table has locking problems
+                * in DPDK and therefore not implemented
+                */
+               PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table");
+               break;
+
+       case NVS_TYPE_VFASSOC_NOTE:
+               hn_nvs_handle_vfassoc(dev, pkt, data);
+               break;
+
+       default:
+               PMD_DRV_LOG(INFO,
+                           "got notify, nvs type %u", hdr->type);
+       }
+}
+
 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
                                      uint16_t queue_id,
                                      unsigned int socket_id)
@@ -700,23 +765,22 @@ struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
 
        rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
-       if (rxq) {
-               rxq->hv = hv;
-               rxq->chan = hv->channels[queue_id];
-               rte_spinlock_init(&rxq->ring_lock);
-               rxq->port_id = hv->port_id;
-               rxq->queue_id = queue_id;
-
-               rxq->event_sz = HN_RXQ_EVENT_DEFAULT;
-               rxq->event_buf = rte_malloc_socket("RX_EVENTS",
-                                                  rxq->event_sz,
-                                                  RTE_CACHE_LINE_SIZE,
-                                                  socket_id);
-               if (!rxq->event_buf) {
-                       rte_free(rxq);
-                       rxq = NULL;
-               }
+       if (!rxq)
+               return NULL;
+
+       rxq->hv = hv;
+       rxq->chan = hv->channels[queue_id];
+       rte_spinlock_init(&rxq->ring_lock);
+       rxq->port_id = hv->port_id;
+       rxq->queue_id = queue_id;
+       rxq->event_sz = HN_RXQ_EVENT_DEFAULT;
+       rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT,
+                                          RTE_CACHE_LINE_SIZE, socket_id);
+       if (!rxq->event_buf) {
+               rte_free(rxq);
+               return NULL;
        }
+
        return rxq;
 }
 
@@ -724,22 +788,17 @@ int
 hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
                      uint16_t queue_idx, uint16_t nb_desc,
                      unsigned int socket_id,
-                     const struct rte_eth_rxconf *rx_conf __rte_unused,
+                     const struct rte_eth_rxconf *rx_conf,
                      struct rte_mempool *mp)
 {
        struct hn_data *hv = dev->data->dev_private;
-       uint32_t qmax = hv->rxbuf_section_cnt;
        char ring_name[RTE_RING_NAMESIZE];
        struct hn_rx_queue *rxq;
        unsigned int count;
-       size_t size;
-       int err = -ENOMEM;
+       int error = -ENOMEM;
 
        PMD_INIT_FUNC_TRACE();
 
-       if (nb_desc == 0 || nb_desc > qmax)
-               nb_desc = qmax;
-
        if (queue_idx == 0) {
                rxq = hv->primary;
        } else {
@@ -749,14 +808,9 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        rxq->mb_pool = mp;
-
-       count = rte_align32pow2(nb_desc);
-       size = sizeof(struct rte_ring) + count * sizeof(void *);
-       rxq->rx_ring = rte_malloc_socket("RX_RING", size,
-                                        RTE_CACHE_LINE_SIZE,
-                                        socket_id);
-       if (!rxq->rx_ring)
-               goto fail;
+       count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues;
+       if (nb_desc == 0 || nb_desc > count)
+               nb_desc = count;
 
        /*
         * Staging ring from receive event logic to rx_pkts.
@@ -765,9 +819,15 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
         */
        snprintf(ring_name, sizeof(ring_name),
                 "hn_rx_%u_%u", dev->data->port_id, queue_idx);
-       err = rte_ring_init(rxq->rx_ring, ring_name,
-                           count, 0);
-       if (err)
+       rxq->rx_ring = rte_ring_create(ring_name,
+                                      rte_align32pow2(nb_desc),
+                                      socket_id, 0);
+       if (!rxq->rx_ring)
+               goto fail;
+
+       error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc,
+                                    socket_id, rx_conf, mp);
+       if (error)
                goto fail;
 
        dev->data->rx_queues[queue_idx] = rxq;
@@ -777,15 +837,12 @@ fail:
        rte_ring_free(rxq->rx_ring);
        rte_free(rxq->event_buf);
        rte_free(rxq);
-       return -ENOMEM;
+       return error;
 }
 
-void
-hn_dev_rx_queue_release(void *arg)
+static void
+hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary)
 {
-       struct hn_rx_queue *rxq = arg;
-
-       PMD_INIT_FUNC_TRACE();
 
        if (!rxq)
                return;
@@ -794,85 +851,98 @@ hn_dev_rx_queue_release(void *arg)
        rxq->rx_ring = NULL;
        rxq->mb_pool = NULL;
 
-       if (rxq != rxq->hv->primary) {
-               rte_free(rxq->event_buf);
-               rte_free(rxq);
-       }
+       hn_vf_rx_queue_release(rxq->hv, rxq->queue_id);
+
+       /* Keep primary queue to allow for control operations */
+       if (keep_primary && rxq == rxq->hv->primary)
+               return;
+
+       rte_free(rxq->event_buf);
+       rte_free(rxq);
 }
 
-static void
-hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
-                    const void *data)
+void
+hn_dev_rx_queue_release(void *arg)
 {
-       const struct hn_nvs_hdr *hdr = data;
+       struct hn_rx_queue *rxq = arg;
 
-       if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
-               PMD_DRV_LOG(ERR, "invalid nvs notify");
-               return;
-       }
+       PMD_INIT_FUNC_TRACE();
+
+       hn_rx_queue_free(rxq, true);
+}
+
+int
+hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt)
+{
+       struct hn_tx_queue *txq = arg;
 
-       PMD_DRV_LOG(INFO,
-                   "got notify, nvs type %u", hdr->type);
+       return hn_process_events(txq->hv, txq->queue_id, free_cnt);
 }
 
 /*
  * Process pending events on the channel.
  * Called from both Rx queue poll and Tx cleanup
  */
-void hn_process_events(struct hn_data *hv, uint16_t queue_id)
+uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
+                          uint32_t tx_limit)
 {
        struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
        struct hn_rx_queue *rxq;
+       uint32_t bytes_read = 0;
+       uint32_t tx_done = 0;
        int ret = 0;
 
        rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
 
        /* If no pending data then nothing to do */
        if (rte_vmbus_chan_rx_empty(rxq->chan))
-               return;
+               return 0;
 
        /*
         * Since channel is shared between Rx and TX queue need to have a lock
         * since DPDK does not force same CPU to be used for Rx/Tx.
         */
        if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock)))
-               return;
+               return 0;
 
        for (;;) {
                const struct vmbus_chanpkt_hdr *pkt;
                uint32_t len = rxq->event_sz;
                const void *data;
 
+retry:
                ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len);
                if (ret == -EAGAIN)
                        break;  /* ring is empty */
 
-               if (ret == -ENOBUFS) {
-                       /* expanded buffer needed */
-                       len = rte_align32pow2(len);
-                       PMD_DRV_LOG(DEBUG, "expand event buf to %u", len);
-
-                       rxq->event_buf = rte_realloc(rxq->event_buf,
-                                                    len, RTE_CACHE_LINE_SIZE);
-                       if (rxq->event_buf) {
-                               rxq->event_sz = len;
-                               continue;
-                       }
-
-                       rte_exit(EXIT_FAILURE, "can not expand event buf!\n");
+               if (unlikely(ret == -ENOBUFS)) {
+                       /* event buffer not large enough to read ring */
+
+                       PMD_DRV_LOG(DEBUG,
+                                   "event buffer expansion (need %u)", len);
+                       rxq->event_sz = len + len / 4;
+                       rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz,
+                                                    RTE_CACHE_LINE_SIZE);
+                       if (rxq->event_buf)
+                               goto retry;
+                       /* out of memory, no more events now */
+                       rxq->event_sz = 0;
                        break;
                }
 
-               if (ret != 0) {
-                       PMD_DRV_LOG(ERR, "vmbus ring buffer error: %d", ret);
-                       break;
+               if (unlikely(ret <= 0)) {
+                       /* This indicates a failure to communicate (or worse) */
+                       rte_exit(EXIT_FAILURE,
+                                "vmbus ring buffer error: %d", ret);
                }
 
+               bytes_read += ret;
                pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf;
                data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen);
 
                switch (pkt->type) {
                case VMBUS_CHANPKT_TYPE_COMP:
+                       ++tx_done;
                        hn_nvs_handle_comp(dev, queue_id, pkt, data);
                        break;
 
@@ -881,18 +951,27 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id)
                        break;
 
                case VMBUS_CHANPKT_TYPE_INBAND:
-                       hn_nvs_handle_notify(pkt, data);
+                       hn_nvs_handle_notify(dev, pkt, data);
                        break;
 
                default:
                        PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type);
                        break;
                }
+
+               if (tx_limit && tx_done >= tx_limit)
+                       break;
+
+               if (rxq->rx_ring && rte_ring_full(rxq->rx_ring))
+                       break;
        }
+
+       if (bytes_read > 0)
+               rte_vmbus_chan_signal_read(rxq->chan, bytes_read);
+
        rte_spinlock_unlock(&rxq->ring_lock);
 
-       if (unlikely(ret != -EAGAIN))
-               PMD_DRV_LOG(ERR, "channel receive failed: %d", ret);
+       return tx_done;
 }
 
 static void hn_append_to_chim(struct hn_tx_queue *txq,
@@ -963,7 +1042,7 @@ static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
        struct hn_txdesc *txd;
 
        if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
-               ++txq->stats.nomemory;
+               ++txq->stats.ring_full;
                PMD_TX_LOG(DEBUG, "tx pool exhausted!");
                return NULL;
        }
@@ -1231,7 +1310,9 @@ uint16_t
 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
        struct hn_tx_queue *txq = ptxq;
+       uint16_t queue_id = txq->queue_id;
        struct hn_data *hv = txq->hv;
+       struct rte_eth_dev *vf_dev;
        bool need_sig = false;
        uint16_t nb_tx;
        int ret;
@@ -1239,8 +1320,17 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        if (unlikely(hv->closed))
                return 0;
 
+       /* Transmit over VF if present and up */
+       vf_dev = hn_get_vf_dev(hv);
+
+       if (vf_dev && vf_dev->data->dev_started) {
+               void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+               return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
+       }
+
        if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
-               hn_process_events(hv, txq->queue_id);
+               hn_process_events(hv, txq->queue_id, 0);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                struct rte_mbuf *m = tx_pkts[nb_tx];
@@ -1258,9 +1348,9 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                        pkt = hn_try_txagg(hv, txq, pkt_size);
                        if (unlikely(!pkt))
-                               goto fail;
+                               break;
 
-                       hn_encap(pkt, txq->queue_id, m);
+                       hn_encap(pkt, queue_id, m);
                        hn_append_to_chim(txq, pkt, m);
 
                        rte_pktmbuf_free(m);
@@ -1279,7 +1369,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        } else {
                                txd = hn_new_txd(hv, txq);
                                if (unlikely(!txd))
-                                       goto fail;
+                                       break;
                        }
 
                        pkt = txd->rndis_pkt;
@@ -1287,7 +1377,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        txd->data_size += m->pkt_len;
                        ++txd->packets;
 
-                       hn_encap(pkt, txq->queue_id, m);
+                       hn_encap(pkt, queue_id, m);
 
                        ret = hn_xmit_sg(txq, txd, m, &need_sig);
                        if (unlikely(ret != 0)) {
@@ -1311,19 +1401,70 @@ fail:
        return nb_tx;
 }
 
+static uint16_t
+hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq,
+          struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       uint16_t i, n;
+
+       if (unlikely(nb_pkts == 0))
+               return 0;
+
+       n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts);
+
+       /* relabel the received mbufs */
+       for (i = 0; i < n; i++)
+               rx_pkts[i]->port = rxq->port_id;
+
+       return n;
+}
+
 uint16_t
 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
        struct hn_rx_queue *rxq = prxq;
        struct hn_data *hv = rxq->hv;
+       struct rte_eth_dev *vf_dev;
+       uint16_t nb_rcv;
 
        if (unlikely(hv->closed))
                return 0;
 
-       /* Get all outstanding receive completions */
-       hn_process_events(hv, rxq->queue_id);
+       /* Receive from VF if present and up */
+       vf_dev = hn_get_vf_dev(hv);
+
+       /* Check for new completions */
+       if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts))
+               hn_process_events(hv, rxq->queue_id, 0);
+
+       /* Always check the vmbus path for multicast and new flows */
+       nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+                                          (void **)rx_pkts, nb_pkts, NULL);
+
+       /* If VF is available, check that as well */
+       if (vf_dev && vf_dev->data->dev_started)
+               nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
+                                    rx_pkts + nb_rcv, nb_pkts - nb_rcv);
+
+       return nb_rcv;
+}
+
+void
+hn_dev_free_queues(struct rte_eth_dev *dev)
+{
+       unsigned int i;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct hn_rx_queue *rxq = dev->data->rx_queues[i];
 
-       /* Get mbufs off staging ring */
-       return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
-                                        nb_pkts, NULL);
+               hn_rx_queue_free(rxq, false);
+               dev->data->rx_queues[i] = NULL;
+       }
+       dev->data->nb_rx_queues = 0;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               hn_dev_tx_queue_release(dev->data->tx_queues[i]);
+               dev->data->tx_queues[i] = NULL;
+       }
+       dev->data->nb_tx_queues = 0;
 }