net/netvsc: add Hyper-V network device
authorStephen Hemminger <sthemmin@microsoft.com>
Fri, 13 Jul 2018 17:06:43 +0000 (10:06 -0700)
committerThomas Monjalon <thomas@monjalon.net>
Fri, 13 Jul 2018 21:48:07 +0000 (23:48 +0200)
The driver supports Hyper-V networking directly like
virtio for KVM or vmxnet3 for VMware.

This code is based off of the FreeBSD driver. The file and variable
names are kept the same to help with understanding (with most of the
BSD style warts removed).

This version supports the latest NetVSP 6.1 version and
older versions.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
20 files changed:
MAINTAINERS
config/common_base
config/common_linuxapp
drivers/bus/vmbus/rte_bus_vmbus_version.map
drivers/net/Makefile
drivers/net/meson.build
drivers/net/netvsc/Makefile [new file with mode: 0644]
drivers/net/netvsc/hn_ethdev.c [new file with mode: 0644]
drivers/net/netvsc/hn_logs.h [new file with mode: 0644]
drivers/net/netvsc/hn_nvs.c [new file with mode: 0644]
drivers/net/netvsc/hn_nvs.h [new file with mode: 0644]
drivers/net/netvsc/hn_rndis.c [new file with mode: 0644]
drivers/net/netvsc/hn_rndis.h [new file with mode: 0644]
drivers/net/netvsc/hn_rxtx.c [new file with mode: 0644]
drivers/net/netvsc/hn_var.h [new file with mode: 0644]
drivers/net/netvsc/meson.build [new file with mode: 0644]
drivers/net/netvsc/ndis.h [new file with mode: 0644]
drivers/net/netvsc/rndis.h [new file with mode: 0644]
drivers/net/netvsc/rte_pmd_netvsc_version.map [new file with mode: 0644]
mk/rte.app.mk

index 61d27a3..0f3bc74 100644 (file)
@@ -607,6 +607,12 @@ F: drivers/net/vdev_netvsc/
 F: doc/guides/nics/vdev_netvsc.rst
 F: doc/guides/nics/features/vdev_netvsc.ini
 
+Microsoft Hyper-V netvsc - EXPERIMENTAL
+M: Stephen Hemminger <sthemmin@microsoft.com>
+M: K. Y. Srinivasan <kys@microsoft.com>
+M: Haiyang Zhang <haiyangz@microsoft.com>
+F: drivers/net/netvsc/
+
 Netcope szedata2
 M: Matej Vido <vido@cesnet.cz>
 F: drivers/net/szedata2/
index 8f8190a..201cdf6 100644 (file)
@@ -404,6 +404,15 @@ CONFIG_RTE_LIBRTE_MVPP2_PMD=n
 #
 CONFIG_RTE_LIBRTE_VMBUS=n
 
+#
+# Compile native PMD for Hyper-V/Azure
+#
+CONFIG_RTE_LIBRTE_NETVSC_PMD=n
+CONFIG_RTE_LIBRTE_NETVSC_DEBUG_RX=n
+CONFIG_RTE_LIBRTE_NETVSC_DEBUG_TX=n
+CONFIG_RTE_LIBRTE_NETVSC_DEBUG_DUMP=n
+
+#
 # Compile virtual device driver for NetVSC on Hyper-V/Azure
 #
 CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=n
index 37e8f69..9c5ea9d 100644 (file)
@@ -26,6 +26,9 @@ CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
 CONFIG_RTE_PROC_INFO=y
 
+CONFIG_RTE_LIBRTE_VMBUS=y
+CONFIG_RTE_LIBRTE_NETVSC_PMD=y
+
 # NXP DPAA BUS and drivers
 CONFIG_RTE_LIBRTE_DPAA_BUS=y
 CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=y
index aa62645..5324fef 100644 (file)
@@ -17,7 +17,6 @@ DPDK_18.08 {
        rte_vmbus_map_device;
        rte_vmbus_max_channels;
        rte_vmbus_probe;
-       rte_vmbus_probe_one;
        rte_vmbus_register;
        rte_vmbus_scan;
        rte_vmbus_sub_channel_index;
index 1ae0eaf..664398d 100644 (file)
@@ -33,6 +33,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mvpp2
+DIRS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += netvsc
 DIRS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp
 DIRS-$(CONFIG_RTE_LIBRTE_BNXT_PMD) += bnxt
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null
index d19b195..9c28ed4 100644 (file)
@@ -19,6 +19,7 @@ drivers = ['af_packet',
        'kni',
        'liquidio',
        'mvpp2',
+       'netvsc',
        'nfp',
        'null', 'octeontx', 'pcap', 'ring',
        'sfc',
diff --git a/drivers/net/netvsc/Makefile b/drivers/net/netvsc/Makefile
new file mode 100644 (file)
index 0000000..3c713af
--- /dev/null
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_pmd_netvsc.a
+
+CFLAGS += -O3 $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+EXPORT_MAP := rte_pmd_netvsc_version.map
+
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vmbus
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
new file mode 100644 (file)
index 0000000..47ed760
--- /dev/null
@@ -0,0 +1,759 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Microsoft Corporation
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_cycles.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_rndis.h"
+#include "hn_nvs.h"
+#include "ndis.h"
+
+#define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \
+                           DEV_TX_OFFLOAD_TCP_CKSUM  | \
+                           DEV_TX_OFFLOAD_UDP_CKSUM  | \
+                           DEV_TX_OFFLOAD_TCP_TSO    | \
+                           DEV_TX_OFFLOAD_MULTI_SEGS | \
+                           DEV_TX_OFFLOAD_VLAN_INSERT)
+
+#define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \
+                           DEV_RX_OFFLOAD_VLAN_STRIP | \
+                           DEV_RX_OFFLOAD_CRC_STRIP)
+
+int hn_logtype_init;
+int hn_logtype_driver;
+
+struct hn_xstats_name_off {
+       char name[RTE_ETH_XSTATS_NAME_SIZE];
+       unsigned int offset;
+};
+
+static const struct hn_xstats_name_off hn_stat_strings[] = {
+       { "good_packets",           offsetof(struct hn_stats, packets) },
+       { "good_bytes",             offsetof(struct hn_stats, bytes) },
+       { "errors",                 offsetof(struct hn_stats, errors) },
+       { "allocation_failed",      offsetof(struct hn_stats, nomemory) },
+       { "multicast_packets",      offsetof(struct hn_stats, multicast) },
+       { "broadcast_packets",      offsetof(struct hn_stats, broadcast) },
+       { "undersize_packets",      offsetof(struct hn_stats, size_bins[0]) },
+       { "size_64_packets",        offsetof(struct hn_stats, size_bins[1]) },
+       { "size_65_127_packets",    offsetof(struct hn_stats, size_bins[2]) },
+       { "size_128_255_packets",   offsetof(struct hn_stats, size_bins[3]) },
+       { "size_256_511_packets",   offsetof(struct hn_stats, size_bins[4]) },
+       { "size_512_1023_packets",  offsetof(struct hn_stats, size_bins[5]) },
+       { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) },
+       { "size_1519_max_packets",  offsetof(struct hn_stats, size_bins[7]) },
+};
+
+static struct rte_eth_dev *
+eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
+{
+       struct rte_eth_dev *eth_dev;
+       const char *name;
+
+       if (!dev)
+               return NULL;
+
+       name = dev->device.name;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               eth_dev = rte_eth_dev_allocate(name);
+               if (!eth_dev) {
+                       PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev");
+                       return NULL;
+               }
+
+               if (private_data_size) {
+                       eth_dev->data->dev_private =
+                               rte_zmalloc_socket(name, private_data_size,
+                                                    RTE_CACHE_LINE_SIZE, dev->device.numa_node);
+                       if (!eth_dev->data->dev_private) {
+                               PMD_DRV_LOG(NOTICE, "can not allocate driver data");
+                               rte_eth_dev_release_port(eth_dev);
+                               return NULL;
+                       }
+               }
+       } else {
+               eth_dev = rte_eth_dev_attach_secondary(name);
+               if (!eth_dev) {
+                       PMD_DRV_LOG(NOTICE, "can not attach secondary");
+                       return NULL;
+               }
+       }
+
+       eth_dev->device = &dev->device;
+       eth_dev->intr_handle = &dev->intr_handle;
+
+       return eth_dev;
+}
+
+static void
+eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
+{
+       /* free ether device */
+       rte_eth_dev_release_port(eth_dev);
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               rte_free(eth_dev->data->dev_private);
+
+       eth_dev->data->dev_private = NULL;
+
+       /*
+        * Secondary process will check the name to attach.
+        * Clear this field to avoid attaching a released ports.
+        */
+       eth_dev->data->name[0] = '\0';
+
+       eth_dev->device = NULL;
+       eth_dev->intr_handle = NULL;
+}
+
+/* Update link status.
+ * Note: the DPDK definition of "wait_to_complete"
+ *   means block this call until link is up.
+ *   which is not worth supporting.
+ */
+static int
+hn_dev_link_update(struct rte_eth_dev *dev,
+                  __rte_unused int wait_to_complete)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_link link, old;
+       int error;
+
+       old = dev->data->dev_link;
+
+       error = hn_rndis_get_linkstatus(hv);
+       if (error)
+               return error;
+
+       hn_rndis_get_linkspeed(hv);
+
+       link = (struct rte_eth_link) {
+               .link_duplex = ETH_LINK_FULL_DUPLEX,
+               .link_autoneg = ETH_LINK_SPEED_FIXED,
+               .link_speed = hv->link_speed / 10000,
+       };
+
+       if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED)
+               link.link_status = ETH_LINK_UP;
+       else
+               link.link_status = ETH_LINK_DOWN;
+
+       if (old.link_status == link.link_status)
+               return 0;
+
+       PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id,
+                    (link.link_status == ETH_LINK_UP) ? "up" : "down");
+
+       return rte_eth_linkstatus_set(dev, &link);
+}
+
+static void hn_dev_info_get(struct rte_eth_dev *dev,
+                           struct rte_eth_dev_info *dev_info)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       dev_info->speed_capa = ETH_LINK_SPEED_10G;
+       dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE;
+       dev_info->max_rx_pktlen  = HN_MAX_XFER_LEN;
+       dev_info->max_mac_addrs  = 1;
+
+       dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ;
+       dev_info->flow_type_rss_offloads =
+               ETH_RSS_IPV4 | ETH_RSS_IPV6 | ETH_RSS_TCP | ETH_RSS_UDP;
+
+       dev_info->max_rx_queues = hv->max_queues;
+       dev_info->max_tx_queues = hv->max_queues;
+
+       hn_rndis_get_offload(hv, dev_info);
+}
+
+static void
+hn_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
+}
+
+static void
+hn_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       uint32_t filter;
+
+       filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST;
+       if (dev->data->all_multicast)
+               filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+       hn_rndis_set_rxfilter(hv, filter);
+}
+
+static void
+hn_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
+                             NDIS_PACKET_TYPE_ALL_MULTICAST |
+                       NDIS_PACKET_TYPE_BROADCAST);
+}
+
+static void
+hn_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
+                            NDIS_PACKET_TYPE_BROADCAST);
+}
+
+/* Setup shared rx/tx queue data */
+static int hn_subchan_configure(struct hn_data *hv,
+                               uint32_t subchan)
+{
+       struct vmbus_channel *primary = hn_primary_chan(hv);
+       int err;
+       unsigned int retry = 0;
+
+       PMD_DRV_LOG(DEBUG,
+                   "open %u subchannels", subchan);
+
+       /* Send create sub channels command */
+       err = hn_nvs_alloc_subchans(hv, &subchan);
+       if (err)
+               return  err;
+
+       while (subchan > 0) {
+               struct vmbus_channel *new_sc;
+               uint16_t chn_index;
+
+               err = rte_vmbus_subchan_open(primary, &new_sc);
+               if (err == -ENOENT && ++retry < 1000) {
+                       /* This can happen if not ready yet */
+                       rte_delay_ms(10);
+                       continue;
+               }
+
+               if (err) {
+                       PMD_DRV_LOG(ERR,
+                                   "open subchannel failed: %d", err);
+                       return err;
+               }
+
+               retry = 0;
+               chn_index = rte_vmbus_sub_channel_index(new_sc);
+               if (chn_index == 0 || chn_index > hv->max_queues) {
+                       PMD_DRV_LOG(ERR,
+                                   "Invalid subchannel offermsg channel %u",
+                                   chn_index);
+                       return -EIO;
+               }
+
+               PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index);
+               hv->channels[chn_index] = new_sc;
+               --subchan;
+       }
+
+       return err;
+}
+
+static int hn_dev_configure(struct rte_eth_dev *dev)
+{
+       const struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+       const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode;
+       const struct rte_eth_txmode *txmode = &dev_conf->txmode;
+
+       const struct rte_eth_rss_conf *rss_conf =
+               &dev_conf->rx_adv_conf.rss_conf;
+       struct hn_data *hv = dev->data->dev_private;
+       uint64_t unsupported;
+       int err, subchan;
+
+       PMD_INIT_FUNC_TRACE();
+
+       unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS;
+       if (unsupported) {
+               PMD_DRV_LOG(NOTICE,
+                           "unsupported TX offload: %#" PRIx64,
+                           unsupported);
+               return -EINVAL;
+       }
+
+       unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS;
+       if (unsupported) {
+               PMD_DRV_LOG(NOTICE,
+                           "unsupported RX offload: %#" PRIx64,
+                           rxmode->offloads);
+               return -EINVAL;
+       }
+
+       err = hn_rndis_conf_offload(hv, txmode->offloads,
+                                   rxmode->offloads);
+       if (err) {
+               PMD_DRV_LOG(NOTICE,
+                           "offload configure failed");
+               return err;
+       }
+
+       hv->num_queues = RTE_MAX(dev->data->nb_rx_queues,
+                                dev->data->nb_tx_queues);
+       subchan = hv->num_queues - 1;
+       if (subchan > 0) {
+               err = hn_subchan_configure(hv, subchan);
+               if (err) {
+                       PMD_DRV_LOG(NOTICE,
+                                   "subchannel configuration failed");
+                       return err;
+               }
+
+               err = hn_rndis_conf_rss(hv, rss_conf);
+               if (err) {
+                       PMD_DRV_LOG(NOTICE,
+                                   "rss configuration failed");
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+static int hn_dev_stats_get(struct rte_eth_dev *dev,
+                           struct rte_eth_stats *stats)
+{
+       unsigned int i;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+               if (!txq)
+                       continue;
+
+               stats->opackets += txq->stats.packets;
+               stats->obytes += txq->stats.bytes;
+               stats->oerrors += txq->stats.errors + txq->stats.nomemory;
+
+               if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+                       stats->q_opackets[i] = txq->stats.packets;
+                       stats->q_obytes[i] = txq->stats.bytes;
+               }
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (!rxq)
+                       continue;
+
+               stats->ipackets += rxq->stats.packets;
+               stats->ibytes += rxq->stats.bytes;
+               stats->ierrors += rxq->stats.errors;
+               stats->imissed += rxq->ring_full;
+
+               if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+                       stats->q_ipackets[i] = rxq->stats.packets;
+                       stats->q_ibytes[i] = rxq->stats.bytes;
+               }
+       }
+
+       stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+       return 0;
+}
+
+static void
+hn_dev_stats_reset(struct rte_eth_dev *dev)
+{
+       unsigned int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+               if (!txq)
+                       continue;
+               memset(&txq->stats, 0, sizeof(struct hn_stats));
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (!rxq)
+                       continue;
+
+               memset(&rxq->stats, 0, sizeof(struct hn_stats));
+               rxq->ring_full = 0;
+       }
+}
+
+static int
+hn_dev_xstats_get_names(struct rte_eth_dev *dev,
+                       struct rte_eth_xstat_name *xstats_names,
+                       __rte_unused unsigned int limit)
+{
+       unsigned int i, t, count = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (!xstats_names)
+               return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
+                       + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+
+       /* Note: limit checked in rte_eth_xstats_names() */
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+               if (!txq)
+                       continue;
+
+               for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+                       snprintf(xstats_names[count++].name,
+                                RTE_ETH_XSTATS_NAME_SIZE,
+                                "tx_q%u_%s", i, hn_stat_strings[t].name);
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)  {
+               const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (!rxq)
+                       continue;
+
+               for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+                       snprintf(xstats_names[count++].name,
+                                RTE_ETH_XSTATS_NAME_SIZE,
+                                "rx_q%u_%s", i,
+                                hn_stat_strings[t].name);
+       }
+
+       return count;
+}
+
+static int
+hn_dev_xstats_get(struct rte_eth_dev *dev,
+                 struct rte_eth_xstat *xstats,
+                 unsigned int n)
+{
+       unsigned int i, t, count = 0;
+
+       const unsigned int nstats =
+               dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
+               + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+       const char *stats;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (n < nstats)
+               return nstats;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+               if (!txq)
+                       continue;
+
+               stats = (const char *)&txq->stats;
+               for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+                       xstats[count++].value = *(const uint64_t *)
+                               (stats + hn_stat_strings[t].offset);
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (!rxq)
+                       continue;
+
+               stats = (const char *)&rxq->stats;
+               for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+                       xstats[count++].value = *(const uint64_t *)
+                               (stats + hn_stat_strings[t].offset);
+       }
+
+       return count;
+}
+
+static int
+hn_dev_start(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* check if lsc interrupt feature is enabled */
+       if (dev->data->dev_conf.intr_conf.lsc) {
+               PMD_DRV_LOG(ERR, "link status not supported yet");
+               return -ENOTSUP;
+       }
+
+       return hn_rndis_set_rxfilter(hv,
+                                    NDIS_PACKET_TYPE_BROADCAST |
+                                    NDIS_PACKET_TYPE_ALL_MULTICAST |
+                                    NDIS_PACKET_TYPE_DIRECTED);
+}
+
+static void
+hn_dev_stop(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       PMD_INIT_FUNC_TRACE();
+
+       hn_rndis_set_rxfilter(hv, 0);
+}
+
+static void
+hn_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+       PMD_INIT_LOG(DEBUG, "close");
+}
+
+static const struct eth_dev_ops hn_eth_dev_ops = {
+       .dev_configure          = hn_dev_configure,
+       .dev_start              = hn_dev_start,
+       .dev_stop               = hn_dev_stop,
+       .dev_close              = hn_dev_close,
+       .dev_infos_get          = hn_dev_info_get,
+       .promiscuous_enable     = hn_dev_promiscuous_enable,
+       .promiscuous_disable    = hn_dev_promiscuous_disable,
+       .allmulticast_enable    = hn_dev_allmulticast_enable,
+       .allmulticast_disable   = hn_dev_allmulticast_disable,
+       .tx_queue_setup         = hn_dev_tx_queue_setup,
+       .tx_queue_release       = hn_dev_tx_queue_release,
+       .rx_queue_setup         = hn_dev_rx_queue_setup,
+       .rx_queue_release       = hn_dev_rx_queue_release,
+       .link_update            = hn_dev_link_update,
+       .stats_get              = hn_dev_stats_get,
+       .xstats_get             = hn_dev_xstats_get,
+       .xstats_get_names       = hn_dev_xstats_get_names,
+       .stats_reset            = hn_dev_stats_reset,
+       .xstats_reset           = hn_dev_stats_reset,
+};
+
+/*
+ * Setup connection between PMD and kernel.
+ */
+static int
+hn_attach(struct hn_data *hv, unsigned int mtu)
+{
+       int error;
+
+       /* Attach NVS */
+       error = hn_nvs_attach(hv, mtu);
+       if (error)
+               goto failed_nvs;
+
+       /* Attach RNDIS */
+       error = hn_rndis_attach(hv);
+       if (error)
+               goto failed_rndis;
+
+       /*
+        * NOTE:
+        * Under certain conditions on certain versions of Hyper-V,
+        * the RNDIS rxfilter is _not_ zero on the hypervisor side
+        * after the successful RNDIS initialization.
+        */
+       hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE);
+       return 0;
+failed_rndis:
+       hn_nvs_detach(hv);
+failed_nvs:
+       return error;
+}
+
+static void
+hn_detach(struct hn_data *hv)
+{
+       hn_nvs_detach(hv);
+       hn_rndis_detach(hv);
+}
+
+static int
+eth_hn_dev_init(struct rte_eth_dev *eth_dev)
+{
+       struct hn_data *hv = eth_dev->data->dev_private;
+       struct rte_device *device = eth_dev->device;
+       struct rte_vmbus_device *vmbus;
+       unsigned int rxr_cnt;
+       int err, max_chan;
+
+       PMD_INIT_FUNC_TRACE();
+
+       vmbus = container_of(device, struct rte_vmbus_device, device);
+       eth_dev->dev_ops = &hn_eth_dev_ops;
+       eth_dev->tx_pkt_burst = &hn_xmit_pkts;
+       eth_dev->rx_pkt_burst = &hn_recv_pkts;
+
+       /*
+        * for secondary processes, we don't initialize any further as primary
+        * has already done this work.
+        */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       /* Since Hyper-V only supports one MAC address, just use local data */
+       eth_dev->data->mac_addrs = &hv->mac_addr;
+
+       hv->vmbus = vmbus;
+       hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
+       hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
+       hv->port_id = eth_dev->data->port_id;
+
+       /* Initialize primary channel input for control operations */
+       err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
+       if (err)
+               return err;
+
+       hv->primary = hn_rx_queue_alloc(hv, 0,
+                                       eth_dev->device->numa_node);
+
+       if (!hv->primary)
+               return -ENOMEM;
+
+       err = hn_attach(hv, ETHER_MTU);
+       if  (err)
+               goto failed;
+
+       err = hn_tx_pool_init(eth_dev);
+       if (err)
+               goto failed;
+
+       err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes);
+       if (err)
+               goto failed;
+
+       max_chan = rte_vmbus_max_channels(vmbus);
+       PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan);
+       if (max_chan <= 0)
+               goto failed;
+
+       if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0)
+               rxr_cnt = 1;
+
+       hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
+
+       return 0;
+
+failed:
+       PMD_INIT_LOG(NOTICE, "device init failed");
+
+       hn_detach(hv);
+       return err;
+}
+
+static int
+eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+       struct hn_data *hv = eth_dev->data->dev_private;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       hn_dev_stop(eth_dev);
+       hn_dev_close(eth_dev);
+
+       eth_dev->dev_ops = NULL;
+       eth_dev->tx_pkt_burst = NULL;
+       eth_dev->rx_pkt_burst = NULL;
+
+       hn_detach(hv);
+       rte_vmbus_chan_close(hv->primary->chan);
+       rte_free(hv->primary);
+
+       eth_dev->data->mac_addrs = NULL;
+
+       return 0;
+}
+
+static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
+                       struct rte_vmbus_device *dev)
+{
+       struct rte_eth_dev *eth_dev;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+
+       eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data));
+       if (!eth_dev)
+               return -ENOMEM;
+
+       ret = eth_hn_dev_init(eth_dev);
+       if (ret)
+               eth_dev_vmbus_release(eth_dev);
+       else
+               rte_eth_dev_probing_finish(eth_dev);
+
+       return ret;
+}
+
+static int eth_hn_remove(struct rte_vmbus_device *dev)
+{
+       struct rte_eth_dev *eth_dev;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+
+       eth_dev = rte_eth_dev_allocated(dev->device.name);
+       if (!eth_dev)
+               return -ENODEV;
+
+       ret = eth_hn_dev_uninit(eth_dev);
+       if (ret)
+               return ret;
+
+       eth_dev_vmbus_release(eth_dev);
+       return 0;
+}
+
+/* Network device GUID */
+static const rte_uuid_t hn_net_ids[] = {
+       /*  f8615163-df3e-46c5-913f-f2d2f965ed0e */
+       RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL),
+       { 0 }
+};
+
+static struct rte_vmbus_driver rte_netvsc_pmd = {
+       .id_table = hn_net_ids,
+       .probe = eth_hn_probe,
+       .remove = eth_hn_remove,
+};
+
+RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd);
+RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic");
+
+RTE_INIT(hn_init_log);
+static void
+hn_init_log(void)
+{
+       hn_logtype_init = rte_log_register("pmd.net.netvsc.init");
+       if (hn_logtype_init >= 0)
+               rte_log_set_level(hn_logtype_init, RTE_LOG_NOTICE);
+       hn_logtype_driver = rte_log_register("pmd.net.netvsc.driver");
+       if (hn_logtype_driver >= 0)
+               rte_log_set_level(hn_logtype_driver, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/netvsc/hn_logs.h b/drivers/net/netvsc/hn_logs.h
new file mode 100644 (file)
index 0000000..cddadef
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _HN_LOGS_H_
+#define _HN_LOGS_H_
+
+#include <rte_log.h>
+
+extern int hn_logtype_init;
+extern int hn_logtype_driver;
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, hn_logtype_init, "%s(): " fmt "\n",\
+               __func__, ## args)
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, hn_logtype_driver, \
+               "%s() rx: " fmt "\n", __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, hn_logtype_driver, \
+               "%s() tx: " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, hn_logtype_driver, "%s(): " fmt "\n", \
+               __func__, ## args)
+
+#endif /* _HN_LOGS_H_ */
diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c
new file mode 100644 (file)
index 0000000..77d3b83
--- /dev/null
@@ -0,0 +1,546 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ */
+
+/*
+ * Network Virtualization Service.
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+
+static const uint32_t hn_nvs_version[] = {
+       NVS_VERSION_61,
+       NVS_VERSION_6,
+       NVS_VERSION_5,
+       NVS_VERSION_4,
+       NVS_VERSION_2,
+       NVS_VERSION_1
+};
+
+static int hn_nvs_req_send(struct hn_data *hv,
+                          void *req, uint32_t reqlen)
+{
+       return rte_vmbus_chan_send(hn_primary_chan(hv),
+                                  VMBUS_CHANPKT_TYPE_INBAND,
+                                  req, reqlen, 0,
+                                  VMBUS_CHANPKT_FLAG_NONE, NULL);
+}
+
+static int
+hn_nvs_execute(struct hn_data *hv,
+              void *req, uint32_t reqlen,
+              void *resp, uint32_t resplen,
+              uint32_t type)
+{
+       struct vmbus_channel *chan = hn_primary_chan(hv);
+       char buffer[NVS_RESPSIZE_MAX];
+       const struct hn_nvs_hdr *hdr;
+       uint32_t len;
+       int ret;
+
+       /* Send request to ring buffer */
+       ret = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND,
+                                 req, reqlen, 0,
+                                 VMBUS_CHANPKT_FLAG_RC, NULL);
+
+       if (ret) {
+               PMD_DRV_LOG(ERR, "send request failed: %d", ret);
+               return ret;
+       }
+
+ retry:
+       len = sizeof(buffer);
+       ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+       if (ret == -EAGAIN) {
+               rte_delay_us(HN_CHAN_INTERVAL_US);
+               goto retry;
+       }
+
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "recv response failed: %d", ret);
+               return ret;
+       }
+
+       hdr = (struct hn_nvs_hdr *)buffer;
+       if (hdr->type != type) {
+               PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
+                           hdr->type, type);
+               return -EINVAL;
+       }
+
+       if (len < resplen) {
+               PMD_DRV_LOG(ERR,
+                           "invalid NVS resp len %u (expect %u)",
+                           len, resplen);
+               return -EINVAL;
+       }
+
+       memcpy(resp, buffer, resplen);
+
+       /* All pass! */
+       return 0;
+}
+
+static int
+hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
+{
+       struct hn_nvs_init init;
+       struct hn_nvs_init_resp resp;
+       uint32_t status;
+       int error;
+
+       memset(&init, 0, sizeof(init));
+       init.type = NVS_TYPE_INIT;
+       init.ver_min = nvs_ver;
+       init.ver_max = nvs_ver;
+
+       error = hn_nvs_execute(hv, &init, sizeof(init),
+                              &resp, sizeof(resp),
+                              NVS_TYPE_INIT_RESP);
+       if (error)
+               return error;
+
+       status = resp.status;
+       if (status != NVS_STATUS_OK) {
+               /* Not fatal, try other versions */
+               PMD_INIT_LOG(DEBUG, "nvs init failed for ver 0x%x",
+                            nvs_ver);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int
+hn_nvs_conn_rxbuf(struct hn_data *hv)
+{
+       struct hn_nvs_rxbuf_conn conn;
+       struct hn_nvs_rxbuf_connresp resp;
+       uint32_t status;
+       int error;
+
+       /* Kernel has already setup RXBUF on primary channel. */
+
+       /*
+        * Connect RXBUF to NVS.
+        */
+       conn.type = NVS_TYPE_RXBUF_CONN;
+       conn.gpadl = hv->rxbuf_res->phys_addr;
+       conn.sig = NVS_RXBUF_SIG;
+       PMD_DRV_LOG(DEBUG, "connect rxbuff va=%p gpad=%#" PRIx64,
+                   hv->rxbuf_res->addr,
+                   hv->rxbuf_res->phys_addr);
+
+       error = hn_nvs_execute(hv, &conn, sizeof(conn),
+                              &resp, sizeof(resp),
+                              NVS_TYPE_RXBUF_CONNRESP);
+       if (error) {
+               PMD_DRV_LOG(ERR,
+                           "exec nvs rxbuf conn failed: %d",
+                           error);
+               return error;
+       }
+
+       status = resp.status;
+       if (status != NVS_STATUS_OK) {
+               PMD_DRV_LOG(ERR,
+                           "nvs rxbuf conn failed: %x", status);
+               return -EIO;
+       }
+       if (resp.nsect != 1) {
+               PMD_DRV_LOG(ERR,
+                           "nvs rxbuf response num sections %u != 1",
+                           resp.nsect);
+               return -EIO;
+       }
+
+       PMD_DRV_LOG(INFO,
+                   "receive buffer size %u count %u",
+                   resp.nvs_sect[0].slotsz,
+                   resp.nvs_sect[0].slotcnt);
+       hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt;
+
+       hv->rxbuf_info = rte_calloc("HN_RXBUF_INFO", hv->rxbuf_section_cnt,
+                                   sizeof(*hv->rxbuf_info), RTE_CACHE_LINE_SIZE);
+       if (!hv->rxbuf_info) {
+               PMD_DRV_LOG(ERR,
+                           "could not allocate rxbuf info");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void
+hn_nvs_disconn_rxbuf(struct hn_data *hv)
+{
+       struct hn_nvs_rxbuf_disconn disconn;
+       int error;
+
+       /*
+        * Disconnect RXBUF from NVS.
+        */
+       memset(&disconn, 0, sizeof(disconn));
+       disconn.type = NVS_TYPE_RXBUF_DISCONN;
+       disconn.sig = NVS_RXBUF_SIG;
+
+       /* NOTE: No response. */
+       error = hn_nvs_req_send(hv, &disconn, sizeof(disconn));
+       if (error) {
+               PMD_DRV_LOG(ERR,
+                           "send nvs rxbuf disconn failed: %d",
+                           error);
+       }
+
+       rte_free(hv->rxbuf_info);
+       /*
+        * Linger long enough for NVS to disconnect RXBUF.
+        */
+       rte_delay_ms(200);
+}
+
+static void
+hn_nvs_disconn_chim(struct hn_data *hv)
+{
+       int error;
+
+       if (hv->chim_cnt != 0) {
+               struct hn_nvs_chim_disconn disconn;
+
+               /* Disconnect chimney sending buffer from NVS. */
+               memset(&disconn, 0, sizeof(disconn));
+               disconn.type = NVS_TYPE_CHIM_DISCONN;
+               disconn.sig = NVS_CHIM_SIG;
+
+               /* NOTE: No response. */
+               error = hn_nvs_req_send(hv, &disconn, sizeof(disconn));
+
+               if (error) {
+                       PMD_DRV_LOG(ERR,
+                                   "send nvs chim disconn failed: %d", error);
+               }
+
+               hv->chim_cnt = 0;
+               /*
+                * Linger long enough for NVS to disconnect chimney
+                * sending buffer.
+                */
+               rte_delay_ms(200);
+       }
+}
+
+static int
+hn_nvs_conn_chim(struct hn_data *hv)
+{
+       struct hn_nvs_chim_conn chim;
+       struct hn_nvs_chim_connresp resp;
+       uint32_t sectsz;
+       unsigned long len = hv->chim_res->len;
+       int error;
+
+       /* Connect chimney sending buffer to NVS */
+       memset(&chim, 0, sizeof(chim));
+       chim.type = NVS_TYPE_CHIM_CONN;
+       chim.gpadl = hv->chim_res->phys_addr;
+       chim.sig = NVS_CHIM_SIG;
+       PMD_DRV_LOG(DEBUG, "connect send buf va=%p gpad=%#" PRIx64,
+                   hv->chim_res->addr,
+                   hv->chim_res->phys_addr);
+
+       error = hn_nvs_execute(hv, &chim, sizeof(chim),
+                              &resp, sizeof(resp),
+                              NVS_TYPE_CHIM_CONNRESP);
+       if (error) {
+               PMD_DRV_LOG(ERR, "exec nvs chim conn failed");
+               goto cleanup;
+       }
+
+       if (resp.status != NVS_STATUS_OK) {
+               PMD_DRV_LOG(ERR, "nvs chim conn failed: %x",
+                           resp.status);
+               error = -EIO;
+               goto cleanup;
+       }
+
+       sectsz = resp.sectsz;
+       if (sectsz == 0 || sectsz & (sizeof(uint32_t) - 1)) {
+               /* Can't use chimney sending buffer; done! */
+               PMD_DRV_LOG(NOTICE,
+                           "invalid chimney sending buffer section size: %u",
+                           sectsz);
+               return 0;
+       }
+
+       hv->chim_szmax = sectsz;
+       hv->chim_cnt = len / sectsz;
+
+       PMD_DRV_LOG(INFO, "send buffer %lu section size:%u, count:%u",
+                   len, hv->chim_szmax, hv->chim_cnt);
+
+       if (len % hv->chim_szmax != 0) {
+               PMD_DRV_LOG(NOTICE,
+                           "chimney sending sections are not properly aligned");
+       }
+
+       /* Done! */
+       return 0;
+
+cleanup:
+       hn_nvs_disconn_chim(hv);
+       return error;
+}
+
+/*
+ * Configure MTU and enable VLAN.
+ */
+static int
+hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu)
+{
+       struct hn_nvs_ndis_conf conf;
+       int error;
+
+       memset(&conf, 0, sizeof(conf));
+       conf.type = NVS_TYPE_NDIS_CONF;
+       conf.mtu = mtu + ETHER_HDR_LEN;
+       conf.caps = NVS_NDIS_CONF_VLAN;
+
+       /* TODO enable SRIOV */
+       //if (hv->nvs_ver >= NVS_VERSION_5)
+       //      conf.caps |= NVS_NDIS_CONF_SRIOV;
+
+       /* NOTE: No response. */
+       error = hn_nvs_req_send(hv, &conf, sizeof(conf));
+       if (error) {
+               PMD_DRV_LOG(ERR,
+                           "send nvs ndis conf failed: %d", error);
+               return error;
+       }
+
+       return 0;
+}
+
+static int
+hn_nvs_init_ndis(struct hn_data *hv)
+{
+       struct hn_nvs_ndis_init ndis;
+       int error;
+
+       memset(&ndis, 0, sizeof(ndis));
+       ndis.type = NVS_TYPE_NDIS_INIT;
+       ndis.ndis_major = NDIS_VERSION_MAJOR(hv->ndis_ver);
+       ndis.ndis_minor = NDIS_VERSION_MINOR(hv->ndis_ver);
+
+       /* NOTE: No response. */
+       error = hn_nvs_req_send(hv, &ndis, sizeof(ndis));
+       if (error)
+               PMD_DRV_LOG(ERR,
+                           "send nvs ndis init failed: %d", error);
+
+       return error;
+}
+
+static int
+hn_nvs_init(struct hn_data *hv)
+{
+       unsigned int i;
+       int error;
+
+       /*
+        * Find the supported NVS version and set NDIS version accordingly.
+        */
+       for (i = 0; i < RTE_DIM(hn_nvs_version); ++i) {
+               error = hn_nvs_doinit(hv, hn_nvs_version[i]);
+               if (error) {
+                       PMD_INIT_LOG(DEBUG, "version %#x error %d",
+                                    hn_nvs_version[i], error);
+                       continue;
+               }
+
+               hv->nvs_ver = hn_nvs_version[i];
+
+               /* Set NDIS version according to NVS version. */
+               hv->ndis_ver = NDIS_VERSION_6_30;
+               if (hv->nvs_ver <= NVS_VERSION_4)
+                       hv->ndis_ver = NDIS_VERSION_6_1;
+
+               PMD_INIT_LOG(DEBUG,
+                            "NVS version %#x, NDIS version %u.%u",
+                            hv->nvs_ver, NDIS_VERSION_MAJOR(hv->ndis_ver),
+                            NDIS_VERSION_MINOR(hv->ndis_ver));
+               return 0;
+       }
+
+       PMD_DRV_LOG(ERR,
+                   "no NVS compatible version available");
+       return -ENXIO;
+}
+
+int
+hn_nvs_attach(struct hn_data *hv, unsigned int mtu)
+{
+       int error;
+
+       /*
+        * Initialize NVS.
+        */
+       error = hn_nvs_init(hv);
+       if (error)
+               return error;
+
+       /** Configure NDIS before initializing it. */
+       if (hv->nvs_ver >= NVS_VERSION_2) {
+               error = hn_nvs_conf_ndis(hv, mtu);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Initialize NDIS.
+        */
+       error = hn_nvs_init_ndis(hv);
+       if (error)
+               return error;
+
+       /*
+        * Connect RXBUF.
+        */
+       error = hn_nvs_conn_rxbuf(hv);
+       if (error)
+               return error;
+
+       /*
+        * Connect chimney sending buffer.
+        */
+       error = hn_nvs_conn_chim(hv);
+       if (error) {
+               hn_nvs_disconn_rxbuf(hv);
+               return error;
+       }
+
+       return 0;
+}
+
+void
+hn_nvs_detach(struct hn_data *hv __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       /* NOTE: there are no requests to stop the NVS. */
+       hn_nvs_disconn_rxbuf(hv);
+       hn_nvs_disconn_chim(hv);
+}
+
+/*
+ * Ack the consumed RXBUF associated w/ this channel packet,
+ * so that this RXBUF can be recycled by the hypervisor.
+ */
+void
+hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid)
+{
+       unsigned int retries = 0;
+       struct hn_nvs_rndis_ack ack = {
+               .type = NVS_TYPE_RNDIS_ACK,
+               .status = NVS_STATUS_OK,
+       };
+       int error;
+
+       PMD_RX_LOG(DEBUG, "ack RX id %" PRIu64, tid);
+
+ again:
+       error = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
+                                   &ack, sizeof(ack), tid,
+                                   VMBUS_CHANPKT_FLAG_NONE, NULL);
+
+       if (error == 0)
+               return;
+
+       if (error == -EAGAIN) {
+               /*
+                * NOTE:
+                * This should _not_ happen in real world, since the
+                * consumption of the TX bufring from the TX path is
+                * controlled.
+                */
+               PMD_RX_LOG(NOTICE, "RXBUF ack retry");
+               if (++retries < 10) {
+                       rte_delay_ms(1);
+                       goto again;
+               }
+       }
+       /* RXBUF leaks! */
+       PMD_DRV_LOG(ERR, "RXBUF ack failed");
+}
+
+int
+hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch)
+{
+       struct hn_nvs_subch_req req;
+       struct hn_nvs_subch_resp resp;
+       int error;
+
+       memset(&req, 0, sizeof(req));
+       req.type = NVS_TYPE_SUBCH_REQ;
+       req.op = NVS_SUBCH_OP_ALLOC;
+       req.nsubch = *nsubch;
+
+       error = hn_nvs_execute(hv, &req, sizeof(req),
+                              &resp, sizeof(resp),
+                              NVS_TYPE_SUBCH_RESP);
+       if (error)
+               return error;
+
+       if (resp.status != NVS_STATUS_OK) {
+               PMD_INIT_LOG(ERR,
+                            "nvs subch alloc failed: %#x",
+                            resp.status);
+               return -EIO;
+       }
+
+       if (resp.nsubch > *nsubch) {
+               PMD_INIT_LOG(NOTICE,
+                            "%u subchans are allocated, requested %u",
+                            resp.nsubch, *nsubch);
+       }
+       *nsubch = resp.nsubch;
+
+       return 0;
+}
+
+void
+hn_nvs_set_datapath(struct hn_data *hv, uint32_t path)
+{
+       struct hn_nvs_datapath dp;
+
+       memset(&dp, 0, sizeof(dp));
+       dp.type = NVS_TYPE_SET_DATAPATH;
+       dp.active_path = path;
+
+       hn_nvs_req_send(hv, &dp, sizeof(dp));
+}
diff --git a/drivers/net/netvsc/hn_nvs.h b/drivers/net/netvsc/hn_nvs.h
new file mode 100644 (file)
index 0000000..984a9c1
--- /dev/null
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+/*
+ * The indirection table message is the largest message
+ * received from host, and that is 112 bytes.
+ */
+#define NVS_RESPSIZE_MAX       256
+
+/*
+ * NDIS protocol version numbers
+ */
+#define NDIS_VERSION_6_1               0x00060001
+#define NDIS_VERSION_6_20              0x00060014
+#define NDIS_VERSION_6_30              0x0006001e
+#define NDIS_VERSION_MAJOR(ver)        (((ver) & 0xffff0000) >> 16)
+#define NDIS_VERSION_MINOR(ver)        ((ver) & 0xffff)
+
+/*
+ * NVS versions.
+ */
+#define NVS_VERSION_1          0x00002
+#define NVS_VERSION_2          0x30002
+#define NVS_VERSION_4          0x40000
+#define NVS_VERSION_5          0x50000
+#define NVS_VERSION_6          0x60000
+#define NVS_VERSION_61         0x60001
+
+#define NVS_RXBUF_SIG          0xcafe
+#define NVS_CHIM_SIG                   0xface
+
+#define NVS_CHIM_IDX_INVALID           0xffffffff
+
+#define NVS_RNDIS_MTYPE_DATA           0
+#define NVS_RNDIS_MTYPE_CTRL           1
+
+/*
+ * NVS message transacion status codes.
+ */
+#define NVS_STATUS_OK          1
+#define NVS_STATUS_FAILED              2
+
+/*
+ * NVS request/response message types.
+ */
+#define NVS_TYPE_INIT          1
+#define NVS_TYPE_INIT_RESP     2
+
+#define NVS_TYPE_NDIS_INIT     100
+#define NVS_TYPE_RXBUF_CONN    101
+#define NVS_TYPE_RXBUF_CONNRESP        102
+#define NVS_TYPE_RXBUF_DISCONN 103
+#define NVS_TYPE_CHIM_CONN     104
+#define NVS_TYPE_CHIM_CONNRESP 105
+#define NVS_TYPE_CHIM_DISCONN  106
+#define NVS_TYPE_RNDIS         107
+#define NVS_TYPE_RNDIS_ACK     108
+
+#define NVS_TYPE_NDIS_CONF     125
+#define NVS_TYPE_VFASSOC_NOTE  128     /* notification */
+#define NVS_TYPE_SET_DATAPATH  129
+#define NVS_TYPE_SUBCH_REQ     133
+#define NVS_TYPE_SUBCH_RESP    133     /* same as SUBCH_REQ */
+#define NVS_TYPE_TXTBL_NOTE    134     /* notification */
+
+
+/* NVS message common header */
+struct hn_nvs_hdr {
+       uint32_t        type;
+} __rte_packed;
+
+struct hn_nvs_init {
+       uint32_t        type;   /* NVS_TYPE_INIT */
+       uint32_t        ver_min;
+       uint32_t        ver_max;
+       uint8_t         rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_init_resp {
+       uint32_t        type;   /* NVS_TYPE_INIT_RESP */
+       uint32_t        ver;    /* deprecated */
+       uint32_t        rsvd;
+       uint32_t        status; /* NVS_STATUS_ */
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_ndis_conf {
+       uint32_t        type;   /* NVS_TYPE_NDIS_CONF */
+       uint32_t        mtu;
+       uint32_t        rsvd;
+       uint64_t        caps;   /* NVS_NDIS_CONF_ */
+       uint8_t         rsvd1[20];
+} __rte_packed;
+
+#define NVS_NDIS_CONF_SRIOV            0x0004
+#define NVS_NDIS_CONF_VLAN             0x0008
+
+/* No response */
+struct hn_nvs_ndis_init {
+       uint32_t        type;   /* NVS_TYPE_NDIS_INIT */
+       uint32_t        ndis_major;     /* NDIS_VERSION_MAJOR_ */
+       uint32_t        ndis_minor;     /* NDIS_VERSION_MINOR_ */
+       uint8_t         rsvd[28];
+} __rte_packed;
+
+#define NVS_DATAPATH_SYNTHETIC 0
+#define NVS_DATAPATH_VF                1
+
+/* No response */
+struct hn_nvs_datapath {
+       uint32_t        type;   /* NVS_TYPE_SET_DATAPATH */
+       uint32_t        active_path;/* NVS_DATAPATH_* */
+       uint8_t         rsvd[32];
+} __rte_packed;
+
+struct hn_nvs_rxbuf_conn {
+       uint32_t        type;   /* NVS_TYPE_RXBUF_CONN */
+       uint32_t        gpadl;  /* RXBUF vmbus GPADL */
+       uint16_t        sig;    /* NVS_RXBUF_SIG */
+       uint8_t         rsvd[30];
+} __rte_packed;
+
+struct hn_nvs_rxbuf_sect {
+       uint32_t        start;
+       uint32_t        slotsz;
+       uint32_t        slotcnt;
+       uint32_t        end;
+} __rte_packed;
+
+struct hn_nvs_rxbuf_connresp {
+       uint32_t        type;   /* NVS_TYPE_RXBUF_CONNRESP */
+       uint32_t        status; /* NVS_STATUS_ */
+       uint32_t        nsect;  /* # of elem in nvs_sect */
+       struct hn_nvs_rxbuf_sect nvs_sect[1];
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_rxbuf_disconn {
+       uint32_t        type;   /* NVS_TYPE_RXBUF_DISCONN */
+       uint16_t        sig;    /* NVS_RXBUF_SIG */
+       uint8_t         rsvd[34];
+} __rte_packed;
+
+struct hn_nvs_chim_conn {
+       uint32_t        type;   /* NVS_TYPE_CHIM_CONN */
+       uint32_t        gpadl;  /* chimney buf vmbus GPADL */
+       uint16_t        sig;    /* NDIS_NVS_CHIM_SIG */
+       uint8_t         rsvd[30];
+} __rte_packed;
+
+struct hn_nvs_chim_connresp {
+       uint32_t        type;   /* NVS_TYPE_CHIM_CONNRESP */
+       uint32_t        status; /* NVS_STATUS_ */
+       uint32_t        sectsz; /* section size */
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_chim_disconn {
+       uint32_t        type;   /* NVS_TYPE_CHIM_DISCONN */
+       uint16_t        sig;    /* NVS_CHIM_SIG */
+       uint8_t         rsvd[34];
+} __rte_packed;
+
+#define NVS_SUBCH_OP_ALLOC             1
+
+struct hn_nvs_subch_req {
+       uint32_t        type;   /* NVS_TYPE_SUBCH_REQ */
+       uint32_t        op;     /* NVS_SUBCH_OP_ */
+       uint32_t        nsubch;
+       uint8_t         rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_subch_resp {
+       uint32_t        type;   /* NVS_TYPE_SUBCH_RESP */
+       uint32_t        status; /* NVS_STATUS_ */
+       uint32_t        nsubch;
+       uint8_t         rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_rndis {
+       uint32_t        type;   /* NVS_TYPE_RNDIS */
+       uint32_t        rndis_mtype;/* NVS_RNDIS_MTYPE_ */
+       /*
+        * Chimney sending buffer index and size.
+        *
+        * NOTE:
+        * If nvs_chim_idx is set to NVS_CHIM_IDX_INVALID
+        * and nvs_chim_sz is set to 0, then chimney sending
+        * buffer is _not_ used by this RNDIS message.
+        */
+       uint32_t        chim_idx;
+       uint32_t        chim_sz;
+       uint8_t         rsvd[24];
+} __rte_packed;
+
+struct hn_nvs_rndis_ack {
+       uint32_t        type;   /* NVS_TYPE_RNDIS_ACK */
+       uint32_t        status; /* NVS_STATUS_ */
+       uint8_t         rsvd[32];
+} __rte_packed;
+
+
+int    hn_nvs_attach(struct hn_data *hv, unsigned int mtu);
+void   hn_nvs_detach(struct hn_data *hv);
+void   hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
+int    hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch);
+void   hn_nvs_set_datapath(struct hn_data *hv, uint32_t path);
+
+static inline int
+hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,
+           void *nvs_msg, int nvs_msglen, uintptr_t sndc,
+           bool *need_sig)
+{
+       return rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND,
+                                  nvs_msg, nvs_msglen, (uint64_t)sndc,
+                                  flags, need_sig);
+}
+
+static inline int
+hn_nvs_send_sglist(struct vmbus_channel *chan,
+                  struct vmbus_gpa sg[], unsigned int sglen,
+                  void *nvs_msg, int nvs_msglen,
+                  uintptr_t sndc, bool *need_sig)
+{
+       return rte_vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen,
+                                         (uint64_t)sndc, need_sig);
+}
diff --git a/drivers/net/netvsc/hn_rndis.c b/drivers/net/netvsc/hn_rndis.c
new file mode 100644 (file)
index 0000000..bde3396
--- /dev/null
@@ -0,0 +1,1099 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2009-2018 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+#include "hn_rndis.h"
+#include "ndis.h"
+
+#define HN_RNDIS_XFER_SIZE             0x4000
+
+#define HN_NDIS_TXCSUM_CAP_IP4         \
+       (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP4                \
+       (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP6                \
+       (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
+        NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_TXCSUM_CAP_UDP6                \
+       (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_LSOV2_CAP_IP6          \
+       (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
+
+/* Get unique request id */
+static inline uint32_t
+hn_rndis_rid(struct hn_data *hv)
+{
+       uint32_t rid;
+
+       do {
+               rid = rte_atomic32_add_return(&hv->rndis_req_id, 1);
+       } while (rid == 0);
+
+       return rid;
+}
+
+static void *hn_rndis_alloc(struct hn_data *hv, size_t size)
+{
+       return rte_zmalloc_socket("RNDIS", size, PAGE_SIZE,
+                                hv->vmbus->device.numa_node);
+}
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
+void hn_rndis_dump(const void *buf)
+{
+       const union {
+               struct rndis_msghdr hdr;
+               struct rndis_packet_msg pkt;
+               struct rndis_init_req init_request;
+               struct rndis_init_comp init_complete;
+               struct rndis_halt_req halt;
+               struct rndis_query_req query_request;
+               struct rndis_query_comp query_complete;
+               struct rndis_set_req set_request;
+               struct rndis_set_comp set_complete;
+               struct rndis_reset_req reset_request;
+               struct rndis_reset_comp reset_complete;
+               struct rndis_keepalive_req keepalive_request;
+               struct rndis_keepalive_comp keepalive_complete;
+               struct rndis_status_msg indicate_status;
+       } *rndis_msg = buf;
+
+       switch (rndis_msg->hdr.type) {
+       case RNDIS_PACKET_MSG: {
+               const struct rndis_pktinfo *ppi;
+               unsigned int ppi_len;
+
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_PACKET (len %u, data %u:%u, # oob %u %u:%u, pkt %u:%u)\n",
+                           rndis_msg->pkt.len,
+                           rndis_msg->pkt.dataoffset,
+                           rndis_msg->pkt.datalen,
+                           rndis_msg->pkt.oobdataelements,
+                           rndis_msg->pkt.oobdataoffset,
+                           rndis_msg->pkt.oobdatalen,
+                           rndis_msg->pkt.pktinfooffset,
+                           rndis_msg->pkt.pktinfolen);
+
+               ppi = (const struct rndis_pktinfo *)
+                       ((const char *)buf
+                        + RNDIS_PACKET_MSG_OFFSET_ABS(rndis_msg->pkt.pktinfooffset));
+
+               ppi_len = rndis_msg->pkt.pktinfolen;
+               while (ppi_len > 0) {
+                       const void *ppi_data;
+
+                       ppi_data = ppi->data;
+
+                       rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                               "    PPI (size %u, type %u, offs %u data %#x)\n",
+                               ppi->size, ppi->type, ppi->offset,
+                               *(const uint32_t *)ppi_data);
+                       if (ppi->size == 0)
+                               break;
+                       ppi_len -= ppi->size;
+                       ppi = (const struct rndis_pktinfo *)
+                               ((const char *)ppi + ppi->size);
+               }
+               break;
+       }
+       case RNDIS_INITIALIZE_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_INIT (len %u id %#x, ver %u.%u max xfer %u)\n",
+                           rndis_msg->init_request.len,
+                           rndis_msg->init_request.rid,
+                           rndis_msg->init_request.ver_major,
+                           rndis_msg->init_request.ver_minor,
+                           rndis_msg->init_request.max_xfersz);
+               break;
+
+       case RNDIS_INITIALIZE_CMPLT:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_INIT_C (len %u, id %#x, status 0x%x, vers %u.%u, "
+                           "flags %d, max xfer %u, max pkts %u, aligned %u)\n",
+                           rndis_msg->init_complete.len,
+                           rndis_msg->init_complete.rid,
+                           rndis_msg->init_complete.status,
+                           rndis_msg->init_complete.ver_major,
+                           rndis_msg->init_complete.ver_minor,
+                           rndis_msg->init_complete.devflags,
+                           rndis_msg->init_complete.pktmaxsz,
+                           rndis_msg->init_complete.pktmaxcnt,
+                           rndis_msg->init_complete.align);
+               break;
+
+       case RNDIS_HALT_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_HALT (len %u id %#x)\n",
+                           rndis_msg->halt.len, rndis_msg->halt.rid);
+               break;
+
+       case RNDIS_QUERY_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_QUERY (len %u, id %#x, oid %#x, info %u:%u)\n",
+                           rndis_msg->query_request.len,
+                           rndis_msg->query_request.rid,
+                           rndis_msg->query_request.oid,
+                           rndis_msg->query_request.infobuflen,
+                           rndis_msg->query_request.infobufoffset);
+               break;
+
+       case RNDIS_QUERY_CMPLT:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_QUERY_C (len %u, id %#x, status 0x%x, buf %u:%u)\n",
+                           rndis_msg->query_complete.len,
+                           rndis_msg->query_complete.rid,
+                           rndis_msg->query_complete.status,
+                           rndis_msg->query_complete.infobuflen,
+                           rndis_msg->query_complete.infobufoffset);
+               break;
+
+       case RNDIS_SET_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_SET (len %u, id %#x, oid %#x, info %u:%u)\n",
+                           rndis_msg->set_request.len,
+                           rndis_msg->set_request.rid,
+                           rndis_msg->set_request.oid,
+                           rndis_msg->set_request.infobuflen,
+                           rndis_msg->set_request.infobufoffset);
+               break;
+
+       case RNDIS_SET_CMPLT:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
+                           rndis_msg->set_complete.len,
+                           rndis_msg->set_complete.rid,
+                           rndis_msg->set_complete.status);
+               break;
+
+       case RNDIS_INDICATE_STATUS_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_MSG_INDICATE (len %u, status %#x, buf len %u, buf offset %u)\n",
+                           rndis_msg->indicate_status.len,
+                           rndis_msg->indicate_status.status,
+                           rndis_msg->indicate_status.stbuflen,
+                           rndis_msg->indicate_status.stbufoffset);
+               break;
+
+       case RNDIS_RESET_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_RESET (len %u, id %#x)\n",
+                           rndis_msg->reset_request.len,
+                           rndis_msg->reset_request.rid);
+               break;
+
+       case RNDIS_RESET_CMPLT:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_RESET_C (len %u, status %#x address %#x)\n",
+                           rndis_msg->reset_complete.len,
+                           rndis_msg->reset_complete.status,
+                           rndis_msg->reset_complete.adrreset);
+               break;
+
+       case RNDIS_KEEPALIVE_MSG:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_KEEPALIVE (len %u, id %#x)\n",
+                           rndis_msg->keepalive_request.len,
+                           rndis_msg->keepalive_request.rid);
+               break;
+
+       case RNDIS_KEEPALIVE_CMPLT:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS_KEEPALIVE_C (len %u, id %#x address %#x)\n",
+                           rndis_msg->keepalive_complete.len,
+                           rndis_msg->keepalive_complete.rid,
+                           rndis_msg->keepalive_complete.status);
+               break;
+
+       default:
+               rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+                           "RNDIS type %#x len %u\n",
+                           rndis_msg->hdr.type,
+                           rndis_msg->hdr.len);
+               break;
+       }
+}
+#endif
+
+static int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
+                                 const void *req, uint32_t reqlen)
+
+{
+       struct hn_nvs_rndis nvs_rndis = {
+               .type = NVS_TYPE_RNDIS,
+               .rndis_mtype = NVS_RNDIS_MTYPE_CTRL,
+               .chim_idx = NVS_CHIM_IDX_INVALID,
+               .chim_sz = 0
+       };
+       struct vmbus_gpa sg;
+       rte_iova_t addr;
+
+       addr = rte_malloc_virt2iova(req);
+       if (unlikely(addr == RTE_BAD_IOVA)) {
+               PMD_DRV_LOG(ERR, "RNDIS send request can not get iova");
+               return -EINVAL;
+       }
+
+       if (unlikely(reqlen > PAGE_SIZE)) {
+               PMD_DRV_LOG(ERR, "RNDIS request %u greater than page size",
+                           reqlen);
+               return -EINVAL;
+       }
+
+       sg.page = addr / PAGE_SIZE;
+       sg.ofs  = addr & PAGE_MASK;
+       sg.len  = reqlen;
+
+       if (sg.ofs + reqlen >  PAGE_SIZE) {
+               PMD_DRV_LOG(ERR, "RNDIS request crosses page bounary");
+               return -EINVAL;
+       }
+
+       hn_rndis_dump(req);
+
+       return hn_nvs_send_sglist(chan, &sg, 1,
+                                 &nvs_rndis, sizeof(nvs_rndis), 0U, NULL);
+}
+
+void hn_rndis_link_status(struct hn_data *hv __rte_unused, const void *msg)
+{
+       const struct rndis_status_msg *indicate = msg;
+
+       hn_rndis_dump(msg);
+
+       PMD_DRV_LOG(DEBUG, "link status %#x", indicate->status);
+
+       switch (indicate->status) {
+       case RNDIS_STATUS_LINK_SPEED_CHANGE:
+       case RNDIS_STATUS_NETWORK_CHANGE:
+       case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
+               /* ignore not in DPDK API */
+               break;
+
+       case RNDIS_STATUS_MEDIA_CONNECT:
+       case RNDIS_STATUS_MEDIA_DISCONNECT:
+               /* TODO handle as LSC interrupt  */
+               break;
+       default:
+               PMD_DRV_LOG(NOTICE, "unknown RNDIS indication: %#x",
+                           indicate->status);
+       }
+}
+
+/* Callback from hn_process_events when response is visible */
+void hn_rndis_receive_response(struct hn_data *hv,
+                              const void *data, uint32_t len)
+{
+       const struct rndis_init_comp *hdr = data;
+
+       hn_rndis_dump(data);
+
+       if (len < sizeof(3 * sizeof(uint32_t))) {
+               PMD_DRV_LOG(ERR,
+                           "missing RNDIS header %u", len);
+               return;
+       }
+
+       if (len < hdr->len) {
+               PMD_DRV_LOG(ERR,
+                           "truncated RNDIS response %u", len);
+               return;
+       }
+
+       if  (len > sizeof(hv->rndis_resp)) {
+               PMD_DRV_LOG(NOTICE,
+                           "RNDIS response exceeds buffer");
+               len = sizeof(hv->rndis_resp);
+       }
+
+       if (hdr->rid == 0) {
+               PMD_DRV_LOG(NOTICE,
+                           "RNDIS response id zero!");
+       }
+
+       memcpy(hv->rndis_resp, data, len);
+
+       /* make sure response copied before update */
+       rte_smp_wmb();
+
+       if (rte_atomic32_cmpset(&hv->rndis_pending, hdr->rid, 0) == 0) {
+               PMD_DRV_LOG(ERR,
+                           "received id %#x pending id %#x",
+                           hdr->rid, (uint32_t)hv->rndis_pending);
+       }
+}
+
+/* Do request/response transaction */
+static int hn_rndis_exec1(struct hn_data *hv,
+                         const void *req, uint32_t reqlen,
+                         void *comp, uint32_t comp_len)
+{
+       const struct rndis_halt_req *hdr = req;
+       uint32_t rid = hdr->rid;
+       struct vmbus_channel *chan = hn_primary_chan(hv);
+       int error;
+
+       if (comp_len > sizeof(hv->rndis_resp)) {
+               PMD_DRV_LOG(ERR,
+                           "Expected completion size %u exceeds buffer %zu",
+                           comp_len, sizeof(hv->rndis_resp));
+               return -EIO;
+       }
+
+       if (comp != NULL &&
+           rte_atomic32_cmpset(&hv->rndis_pending, 0, rid) == 0) {
+               PMD_DRV_LOG(ERR,
+                           "Request already pending");
+               return -EBUSY;
+       }
+
+       error = hn_nvs_send_rndis_ctrl(chan, req, reqlen);
+       if (error) {
+               PMD_DRV_LOG(ERR, "RNDIS ctrl send failed: %d", error);
+               return error;
+       }
+
+       if (comp) {
+               /* Poll primary channel until response received */
+               while (hv->rndis_pending == rid)
+                       hn_process_events(hv, 0);
+
+               memcpy(comp, hv->rndis_resp, comp_len);
+       }
+
+       return 0;
+}
+
+/* Do transaction and validate response */
+static int hn_rndis_execute(struct hn_data *hv, uint32_t rid,
+                           const void *req, uint32_t reqlen,
+                           void *comp, uint32_t comp_len, uint32_t comp_type)
+{
+       const struct rndis_comp_hdr *hdr = comp;
+       int ret;
+
+       memset(comp, 0, comp_len);
+
+       ret = hn_rndis_exec1(hv, req, reqlen, comp, comp_len);
+       if (ret < 0)
+               return ret;
+       /*
+        * Check this RNDIS complete message.
+        */
+       if (unlikely(hdr->type != comp_type)) {
+               PMD_DRV_LOG(ERR,
+                           "unexpected RNDIS response complete %#x expect %#x",
+                           hdr->type, comp_type);
+
+               return -ENXIO;
+       }
+       if (unlikely(hdr->rid != rid)) {
+               PMD_DRV_LOG(ERR,
+                           "RNDIS comp rid mismatch %#x, expect %#x",
+                           hdr->rid, rid);
+               return -EINVAL;
+       }
+
+       /* All pass! */
+       return 0;
+}
+
+static int
+hn_rndis_query(struct hn_data *hv, uint32_t oid,
+              const void *idata, uint32_t idlen,
+              void *odata, uint32_t odlen)
+{
+       struct rndis_query_req *req;
+       struct rndis_query_comp *comp;
+       uint32_t reqlen, comp_len;
+       int error = -EIO;
+       unsigned int ofs;
+       uint32_t rid;
+
+       reqlen = sizeof(*req) + idlen;
+       req = hn_rndis_alloc(hv, reqlen);
+       if (req == NULL)
+               return -ENOMEM;
+
+       comp_len = sizeof(*comp) + odlen;
+       comp = rte_zmalloc("QUERY", comp_len, PAGE_SIZE);
+       if (!comp) {
+               error = -ENOMEM;
+               goto done;
+       }
+       comp->status = RNDIS_STATUS_PENDING;
+
+       rid = hn_rndis_rid(hv);
+
+       req->type = RNDIS_QUERY_MSG;
+       req->len = reqlen;
+       req->rid = rid;
+       req->oid = oid;
+       req->infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
+       req->infobuflen = idlen;
+
+       /* Input data immediately follows RNDIS query. */
+       memcpy(req + 1, idata, idlen);
+
+       error = hn_rndis_execute(hv, rid, req, reqlen,
+                                comp, comp_len, RNDIS_QUERY_CMPLT);
+
+       if (error)
+               goto done;
+
+       if (comp->status != RNDIS_STATUS_SUCCESS) {
+               PMD_DRV_LOG(ERR, "RNDIS query 0x%08x failed: status 0x%08x",
+                           oid, comp->status);
+               error = -EINVAL;
+               goto done;
+       }
+
+       if (comp->infobuflen == 0 || comp->infobufoffset == 0) {
+               /* No output data! */
+               PMD_DRV_LOG(ERR, "RNDIS query 0x%08x, no data", oid);
+               error = 0;
+               goto done;
+       }
+
+       /*
+        * Check output data length and offset.
+        */
+       /* ofs is the offset from the beginning of comp. */
+       ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->infobufoffset);
+       if (ofs < sizeof(*comp) || ofs + comp->infobuflen > comp_len) {
+               PMD_DRV_LOG(ERR, "RNDIS query invalid comp ib off/len, %u/%u",
+                           comp->infobufoffset, comp->infobuflen);
+               error = -EINVAL;
+               goto done;
+       }
+
+       /* Save output data. */
+       if (comp->infobuflen < odlen)
+               odlen = comp->infobuflen;
+
+       /* ofs is the offset from the beginning of comp. */
+       memcpy(odata, (const char *)comp + ofs, odlen);
+
+       error = 0;
+done:
+       rte_free(comp);
+       rte_free(req);
+       return error;
+}
+
+static int
+hn_rndis_halt(struct hn_data *hv)
+{
+       struct rndis_halt_req *halt;
+
+       halt = hn_rndis_alloc(hv, sizeof(*halt));
+       if (halt == NULL)
+               return -ENOMEM;
+
+       halt->type = RNDIS_HALT_MSG;
+       halt->len = sizeof(*halt);
+       halt->rid = hn_rndis_rid(hv);
+
+       /* No RNDIS completion; rely on NVS message send completion */
+       hn_rndis_exec1(hv, halt, sizeof(*halt), NULL, 0);
+
+       rte_free(halt);
+
+       PMD_INIT_LOG(DEBUG, "RNDIS halt done");
+       return 0;
+}
+
+static int
+hn_rndis_query_hwcaps(struct hn_data *hv, struct ndis_offload *caps)
+{
+       struct ndis_offload in;
+       uint32_t caps_len, size;
+       int error;
+
+       memset(caps, 0, sizeof(*caps));
+       memset(&in, 0, sizeof(in));
+       in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
+
+       if (hv->ndis_ver >= NDIS_VERSION_6_30) {
+               in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
+               size = NDIS_OFFLOAD_SIZE;
+       } else if (hv->ndis_ver >= NDIS_VERSION_6_1) {
+               in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
+               size = NDIS_OFFLOAD_SIZE_6_1;
+       } else {
+               in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
+               size = NDIS_OFFLOAD_SIZE_6_0;
+       }
+       in.ndis_hdr.ndis_size = size;
+
+       caps_len = NDIS_OFFLOAD_SIZE;
+       error = hn_rndis_query(hv, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
+                              &in, size, caps, caps_len);
+       if (error)
+               return error;
+
+       /* Preliminary verification. */
+       if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
+               PMD_DRV_LOG(NOTICE, "invalid NDIS objtype 0x%02x",
+                           caps->ndis_hdr.ndis_type);
+               return -EINVAL;
+       }
+       if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
+               PMD_DRV_LOG(NOTICE, "invalid NDIS objrev 0x%02x",
+                           caps->ndis_hdr.ndis_rev);
+               return -EINVAL;
+       }
+       if (caps->ndis_hdr.ndis_size > caps_len) {
+               PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u, data size %u",
+                           caps->ndis_hdr.ndis_size, caps_len);
+               return -EINVAL;
+       } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
+               PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u",
+                           caps->ndis_hdr.ndis_size);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int
+hn_rndis_query_rsscaps(struct hn_data *hv,
+                      unsigned int *rxr_cnt0)
+{
+       struct ndis_rss_caps in, caps;
+       unsigned int indsz, rxr_cnt;
+       uint32_t caps_len;
+       int error;
+
+       *rxr_cnt0 = 0;
+
+       if (hv->ndis_ver < NDIS_VERSION_6_20) {
+               PMD_DRV_LOG(DEBUG, "RSS not supported on this host");
+               return -EOPNOTSUPP;
+       }
+
+       memset(&in, 0, sizeof(in));
+       in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
+       in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
+       in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
+
+       caps_len = NDIS_RSS_CAPS_SIZE;
+       error = hn_rndis_query(hv, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
+                              &in, NDIS_RSS_CAPS_SIZE,
+                              &caps, caps_len);
+       if (error)
+               return error;
+
+       PMD_INIT_LOG(DEBUG, "RX rings %u indirect %u caps %#x",
+                    caps.ndis_nrxr, caps.ndis_nind, caps.ndis_caps);
+       /*
+        * Preliminary verification.
+        */
+       if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
+               PMD_DRV_LOG(ERR, "invalid NDIS objtype 0x%02x",
+                           caps.ndis_hdr.ndis_type);
+               return -EINVAL;
+       }
+       if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
+               PMD_DRV_LOG(ERR, "invalid NDIS objrev 0x%02x",
+                           caps.ndis_hdr.ndis_rev);
+               return -EINVAL;
+       }
+       if (caps.ndis_hdr.ndis_size > caps_len) {
+               PMD_DRV_LOG(ERR,
+                           "invalid NDIS objsize %u, data size %u",
+                           caps.ndis_hdr.ndis_size, caps_len);
+               return -EINVAL;
+       } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
+               PMD_DRV_LOG(ERR, "invalid NDIS objsize %u",
+                           caps.ndis_hdr.ndis_size);
+               return -EINVAL;
+       }
+
+       /*
+        * Save information for later RSS configuration.
+        */
+       if (caps.ndis_nrxr == 0) {
+               PMD_DRV_LOG(ERR, "0 RX rings!?");
+               return -EINVAL;
+       }
+       rxr_cnt = caps.ndis_nrxr;
+
+       if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
+           caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
+               if (caps.ndis_nind > NDIS_HASH_INDCNT) {
+                       PMD_DRV_LOG(ERR,
+                                   "too many RSS indirect table entries %u",
+                                   caps.ndis_nind);
+                       return -EOPNOTSUPP;
+               }
+               if (!rte_is_power_of_2(caps.ndis_nind)) {
+                       PMD_DRV_LOG(ERR,
+                                   "RSS indirect table size is not power-of-2 %u",
+                                   caps.ndis_nind);
+               }
+
+               indsz = caps.ndis_nind;
+       } else {
+               indsz = NDIS_HASH_INDCNT;
+       }
+
+       if (indsz < rxr_cnt) {
+               PMD_DRV_LOG(NOTICE,
+                           "# of RX rings (%d) > RSS indirect table size %d",
+                           rxr_cnt, indsz);
+               rxr_cnt = indsz;
+       }
+
+       hv->rss_offloads = 0;
+       if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
+               hv->rss_offloads |= ETH_RSS_IPV4
+                       | ETH_RSS_NONFRAG_IPV4_TCP
+                       | ETH_RSS_NONFRAG_IPV4_UDP;
+       if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
+               hv->rss_offloads |= ETH_RSS_IPV6
+                       | ETH_RSS_NONFRAG_IPV6_TCP;
+       if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
+               hv->rss_offloads |= ETH_RSS_IPV6_EX
+                       | ETH_RSS_IPV6_TCP_EX;
+
+       /* Commit! */
+       *rxr_cnt0 = rxr_cnt;
+
+       return 0;
+}
+
+static int
+hn_rndis_set(struct hn_data *hv, uint32_t oid, const void *data, uint32_t dlen)
+{
+       struct rndis_set_req *req;
+       struct rndis_set_comp comp;
+       uint32_t reqlen, comp_len;
+       uint32_t rid;
+       int error;
+
+       reqlen = sizeof(*req) + dlen;
+       req = rte_zmalloc("RNDIS_SET", reqlen, PAGE_SIZE);
+       if (!req)
+               return -ENOMEM;
+
+       rid = hn_rndis_rid(hv);
+       req->type = RNDIS_SET_MSG;
+       req->len = reqlen;
+       req->rid = rid;
+       req->oid = oid;
+       req->infobuflen = dlen;
+       req->infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
+
+       /* Data immediately follows RNDIS set. */
+       memcpy(req + 1, data, dlen);
+
+       comp_len = sizeof(comp);
+       error = hn_rndis_execute(hv, rid, req, reqlen,
+                                &comp, comp_len,
+                                RNDIS_SET_CMPLT);
+       if (error) {
+               PMD_DRV_LOG(ERR, "exec RNDIS set %#" PRIx32 " failed",
+                           oid);
+               error = EIO;
+               goto done;
+       }
+
+       if (comp.status != RNDIS_STATUS_SUCCESS) {
+               PMD_DRV_LOG(ERR,
+                           "RNDIS set %#" PRIx32 " failed: status %#" PRIx32,
+                           oid, comp.status);
+               error = EIO;
+               goto done;
+       }
+
+done:
+       rte_free(req);
+       return error;
+}
+
+int hn_rndis_conf_offload(struct hn_data *hv,
+                         uint64_t tx_offloads, uint64_t rx_offloads)
+{
+       struct ndis_offload_params params;
+       struct ndis_offload hwcaps;
+       int error;
+
+       error = hn_rndis_query_hwcaps(hv, &hwcaps);
+       if (error) {
+               PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+               return error;
+       }
+
+       /* NOTE: 0 means "no change" */
+       memset(&params, 0, sizeof(params));
+
+       params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
+       if (hv->ndis_ver < NDIS_VERSION_6_30) {
+               params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
+               params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
+       } else {
+               params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
+               params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE;
+       }
+
+       if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
+               if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_TCP4)
+                       params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
+               else
+                       goto unsupported;
+
+               if (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_TCP6)
+                       params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
+               else
+                       goto unsupported;
+       }
+
+       if (rx_offloads & DEV_RX_OFFLOAD_TCP_CKSUM) {
+               if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4)
+                   == NDIS_RXCSUM_CAP_TCP4)
+                       params.ndis_tcp4csum |= NDIS_OFFLOAD_PARAM_RX;
+               else
+                       goto unsupported;
+
+               if ((hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)
+                   == NDIS_RXCSUM_CAP_TCP6)
+                       params.ndis_tcp6csum |= NDIS_OFFLOAD_PARAM_RX;
+               else
+                       goto unsupported;
+       }
+
+       if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
+               if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4)
+                       params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
+               else
+                       goto unsupported;
+
+               if ((hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6)
+                   == NDIS_TXCSUM_CAP_UDP6)
+                       params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
+               else
+                       goto unsupported;
+       }
+
+       if (rx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
+               if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4)
+                       params.ndis_udp4csum |= NDIS_OFFLOAD_PARAM_RX;
+               else
+                       goto unsupported;
+
+               if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)
+                       params.ndis_udp6csum |= NDIS_OFFLOAD_PARAM_RX;
+               else
+                       goto unsupported;
+       }
+
+       if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
+               if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_IP4)
+                   == NDIS_TXCSUM_CAP_IP4)
+                       params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
+               else
+                       goto unsupported;
+       }
+       if (rx_offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) {
+               if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+                       params.ndis_ip4csum |= NDIS_OFFLOAD_PARAM_RX;
+               else
+                       goto unsupported;
+       }
+
+       if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) {
+               if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023)
+                       params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
+               else
+                       goto unsupported;
+
+               if ((hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6)
+                   == HN_NDIS_LSOV2_CAP_IP6)
+                       params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
+               else
+                       goto unsupported;
+       }
+
+       error = hn_rndis_set(hv, OID_TCP_OFFLOAD_PARAMETERS, &params,
+                            params.ndis_hdr.ndis_size);
+       if (error) {
+               PMD_DRV_LOG(ERR, "offload config failed");
+               return error;
+       }
+
+       return 0;
+ unsupported:
+       PMD_DRV_LOG(NOTICE,
+                   "offload tx:%" PRIx64 " rx:%" PRIx64 " not supported by this version",
+                   tx_offloads, rx_offloads);
+       return -EINVAL;
+}
+
+int hn_rndis_get_offload(struct hn_data *hv,
+                        struct rte_eth_dev_info *dev_info)
+{
+       struct ndis_offload hwcaps;
+       int error;
+
+       memset(&hwcaps, 0, sizeof(hwcaps));
+
+       error = hn_rndis_query_hwcaps(hv, &hwcaps);
+       if (error) {
+               PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+               return error;
+       }
+
+       dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
+                                   DEV_TX_OFFLOAD_VLAN_INSERT;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4)
+           == HN_NDIS_TXCSUM_CAP_IP4)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4)
+           == HN_NDIS_TXCSUM_CAP_TCP4 &&
+           (hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6)
+           == HN_NDIS_TXCSUM_CAP_TCP6)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_CKSUM;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) &&
+           (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6))
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_UDP_CKSUM;
+
+       if ((hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) &&
+           (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6)
+           == HN_NDIS_LSOV2_CAP_IP6)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
+
+       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
+                                   DEV_RX_OFFLOAD_CRC_STRIP;
+
+       if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) &&
+           (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_CKSUM;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) &&
+           (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_UDP_CKSUM;
+
+       return 0;
+}
+
+int
+hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter)
+{
+       int error;
+
+       error = hn_rndis_set(hv, OID_GEN_CURRENT_PACKET_FILTER,
+                            &filter, sizeof(filter));
+       if (error) {
+               PMD_DRV_LOG(ERR, "set RX filter %#" PRIx32 " failed: %d",
+                           filter, error);
+       } else {
+               PMD_DRV_LOG(DEBUG, "set RX filter %#" PRIx32 " done", filter);
+       }
+
+       return error;
+}
+
+/* The default RSS key.
+ * This value is the same as MLX5 so that flows will be
+ * received on same path for both VF ans synthetic NIC.
+ */
+static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
+       0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
+       0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
+       0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
+       0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
+       0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a,
+};
+
+int hn_rndis_conf_rss(struct hn_data *hv,
+                     const struct rte_eth_rss_conf *rss_conf)
+{
+       struct ndis_rssprm_toeplitz rssp;
+       struct ndis_rss_params *prm = &rssp.rss_params;
+       const uint8_t *rss_key = rss_conf->rss_key ? : rss_default_key;
+       uint32_t rss_hash;
+       unsigned int i;
+       int error;
+
+       PMD_INIT_FUNC_TRACE();
+
+       memset(&rssp, 0, sizeof(rssp));
+
+       prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
+       prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
+       prm->ndis_hdr.ndis_size = sizeof(*prm);
+       prm->ndis_flags = 0;
+
+       rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ;
+       if (rss_conf->rss_hf & ETH_RSS_IPV4)
+               rss_hash |= NDIS_HASH_IPV4;
+       if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+               rss_hash |= NDIS_HASH_TCP_IPV4;
+       if (rss_conf->rss_hf & ETH_RSS_IPV6)
+               rss_hash |=  NDIS_HASH_IPV6;
+       if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+               rss_hash |= NDIS_HASH_TCP_IPV6;
+
+       prm->ndis_hash = rss_hash;
+       prm->ndis_indsize = sizeof(rssp.rss_ind[0]) * NDIS_HASH_INDCNT;
+       prm->ndis_indoffset = offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
+       prm->ndis_keysize = NDIS_HASH_KEYSIZE_TOEPLITZ;
+       prm->ndis_keyoffset = offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
+
+       for (i = 0; i < NDIS_HASH_INDCNT; i++)
+               rssp.rss_ind[i] = i % hv->num_queues;
+
+       /* Set hask key values */
+       memcpy(&rssp.rss_key, rss_key, NDIS_HASH_KEYSIZE_TOEPLITZ);
+
+       error = hn_rndis_set(hv, OID_GEN_RECEIVE_SCALE_PARAMETERS,
+                            &rssp, sizeof(rssp));
+       if (error) {
+               PMD_DRV_LOG(ERR,
+                           "RSS config num queues=%u failed: %d",
+                           hv->num_queues, error);
+       }
+       return error;
+}
+
+static int hn_rndis_init(struct hn_data *hv)
+{
+       struct rndis_init_req *req;
+       struct rndis_init_comp comp;
+       uint32_t comp_len, rid;
+       int error;
+
+       req = hn_rndis_alloc(hv, sizeof(*req));
+       if (!req) {
+               PMD_DRV_LOG(ERR, "no memory for RNDIS init");
+               return -ENXIO;
+       }
+
+       rid = hn_rndis_rid(hv);
+       req->type = RNDIS_INITIALIZE_MSG;
+       req->len = sizeof(*req);
+       req->rid = rid;
+       req->ver_major = RNDIS_VERSION_MAJOR;
+       req->ver_minor = RNDIS_VERSION_MINOR;
+       req->max_xfersz = HN_RNDIS_XFER_SIZE;
+
+       comp_len = RNDIS_INIT_COMP_SIZE_MIN;
+       error = hn_rndis_execute(hv, rid, req, sizeof(*req),
+                                &comp, comp_len,
+                                RNDIS_INITIALIZE_CMPLT);
+       if (error)
+               goto done;
+
+       if (comp.status != RNDIS_STATUS_SUCCESS) {
+               PMD_DRV_LOG(ERR, "RNDIS init failed: status 0x%08x",
+                           comp.status);
+               error = -EIO;
+               goto done;
+       }
+
+       hv->rndis_agg_size = comp.pktmaxsz;
+       hv->rndis_agg_pkts = comp.pktmaxcnt;
+       hv->rndis_agg_align = 1U << comp.align;
+
+       if (hv->rndis_agg_align < sizeof(uint32_t)) {
+               /*
+                * The RNDIS packet message encap assumes that the RNDIS
+                * packet message is at least 4 bytes aligned.  Fix up the
+                * alignment here, if the remote side sets the alignment
+                * too low.
+                */
+               PMD_DRV_LOG(NOTICE,
+                           "fixup RNDIS aggpkt align: %u -> %zu",
+                           hv->rndis_agg_align, sizeof(uint32_t));
+               hv->rndis_agg_align = sizeof(uint32_t);
+       }
+
+       PMD_INIT_LOG(INFO,
+                    "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u",
+                    comp.ver_major, comp.ver_minor,
+                    hv->rndis_agg_size, hv->rndis_agg_pkts,
+                    hv->rndis_agg_align);
+       error = 0;
+done:
+       rte_free(req);
+       return error;
+}
+
+int
+hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr)
+{
+       uint32_t eaddr_len;
+       int error;
+
+       eaddr_len = ETHER_ADDR_LEN;
+       error = hn_rndis_query(hv, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
+                              eaddr, eaddr_len);
+       if (error)
+               return error;
+
+       PMD_DRV_LOG(INFO, "MAC address %02x:%02x:%02x:%02x:%02x:%02x",
+                   eaddr[0], eaddr[1], eaddr[2],
+                   eaddr[3], eaddr[4], eaddr[5]);
+       return 0;
+}
+
+int
+hn_rndis_get_linkstatus(struct hn_data *hv)
+{
+       return hn_rndis_query(hv, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
+                             &hv->link_status, sizeof(uint32_t));
+}
+
+int
+hn_rndis_get_linkspeed(struct hn_data *hv)
+{
+       return hn_rndis_query(hv, OID_GEN_LINK_SPEED, NULL, 0,
+                             &hv->link_speed, sizeof(uint32_t));
+}
+
+int
+hn_rndis_attach(struct hn_data *hv)
+{
+       /* Initialize RNDIS. */
+       return hn_rndis_init(hv);
+}
+
+void
+hn_rndis_detach(struct hn_data *hv)
+{
+       /* Halt the RNDIS. */
+       hn_rndis_halt(hv);
+}
diff --git a/drivers/net/netvsc/hn_rndis.h b/drivers/net/netvsc/hn_rndis.h
new file mode 100644 (file)
index 0000000..89e2e6b
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#include "rndis.h"
+
+struct hn_data;
+
+void hn_rndis_receive_response(struct hn_data *hv,
+                             const void *data, uint32_t len);
+void   hn_rndis_link_status(struct hn_data *hv, const void *data);
+int    hn_rndis_attach(struct hn_data *hv);
+void   hn_rndis_detach(struct hn_data *hv);
+int    hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr);
+int    hn_rndis_get_linkstatus(struct hn_data *hv);
+int    hn_rndis_get_linkspeed(struct hn_data *hv);
+int    hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter);
+void   hn_rndis_rx_ctrl(struct hn_data *hv, const void *data,
+                        int dlen);
+int    hn_rndis_get_offload(struct hn_data *hv,
+                            struct rte_eth_dev_info *dev_info);
+int    hn_rndis_conf_offload(struct hn_data *hv,
+                             uint64_t tx_offloads,
+                             uint64_t rx_offloads);
+int    hn_rndis_query_rsscaps(struct hn_data *hv,
+                              unsigned int *rxr_cnt0);
+int    hn_rndis_conf_rss(struct hn_data *hv,
+                         const struct rte_eth_rss_conf *rss_conf);
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
+void hn_rndis_dump(const void *buf);
+#else
+#define hn_rndis_dump(buf)
+#endif
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
new file mode 100644 (file)
index 0000000..6d2f41c
--- /dev/null
@@ -0,0 +1,1329 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Microsoft Corporation
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <strings.h>
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+#include <rte_spinlock.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_rndis.h"
+#include "hn_nvs.h"
+#include "ndis.h"
+
+#define HN_NVS_SEND_MSG_SIZE \
+       (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis))
+
+#define HN_TXD_CACHE_SIZE      32 /* per cpu tx_descriptor pool cache */
+#define HN_TXCOPY_THRESHOLD    512
+
+#define HN_RXCOPY_THRESHOLD    256
+#define HN_RXQ_EVENT_DEFAULT   1024
+
+struct hn_rxinfo {
+       uint32_t        vlan_info;
+       uint32_t        csum_info;
+       uint32_t        hash_info;
+       uint32_t        hash_value;
+};
+
+#define HN_RXINFO_VLAN                 0x0001
+#define HN_RXINFO_CSUM                 0x0002
+#define HN_RXINFO_HASHINF              0x0004
+#define HN_RXINFO_HASHVAL              0x0008
+#define HN_RXINFO_ALL                  \
+       (HN_RXINFO_VLAN |               \
+        HN_RXINFO_CSUM |               \
+        HN_RXINFO_HASHINF |            \
+        HN_RXINFO_HASHVAL)
+
+#define HN_NDIS_VLAN_INFO_INVALID      0xffffffff
+#define HN_NDIS_RXCSUM_INFO_INVALID    0
+#define HN_NDIS_HASH_INFO_INVALID      0
+
+/*
+ * Per-transmit book keeping.
+ * A slot in transmit ring (chim_index) is reserved for each transmit.
+ *
+ * There are two types of transmit:
+ *   - buffered transmit where chimney buffer is used and RNDIS header
+ *     is in the buffer. mbuf == NULL for this case.
+ *
+ *   - direct transmit where RNDIS header is in the in  rndis_pkt
+ *     mbuf is freed after transmit.
+ *
+ * Descriptors come from per-port pool which is used
+ * to limit number of outstanding requests per device.
+ */
+struct hn_txdesc {
+       struct rte_mbuf *m;
+
+       uint16_t        queue_id;
+       uint16_t        chim_index;
+       uint32_t        chim_size;
+       uint32_t        data_size;
+       uint32_t        packets;
+
+       struct rndis_packet_msg *rndis_pkt;
+};
+
+#define HN_RNDIS_PKT_LEN                               \
+       (sizeof(struct rndis_packet_msg) +              \
+        RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) +     \
+        RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +      \
+        RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +      \
+        RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
+
+/* Minimum space required for a packet */
+#define HN_PKTSIZE_MIN(align) \
+       RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
+
+#define DEFAULT_TX_FREE_THRESH 32U
+
+static void
+hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
+{
+       uint32_t s = m->pkt_len;
+       const struct ether_addr *ea;
+
+       if (s == 64) {
+               stats->size_bins[1]++;
+       } else if (s > 64 && s < 1024) {
+               uint32_t bin;
+
+               /* count zeros, and offset into correct bin */
+               bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
+               stats->size_bins[bin]++;
+       } else {
+               if (s < 64)
+                       stats->size_bins[0]++;
+               else if (s < 1519)
+                       stats->size_bins[6]++;
+               else if (s >= 1519)
+                       stats->size_bins[7]++;
+       }
+
+       ea = rte_pktmbuf_mtod(m, const struct ether_addr *);
+       if (is_multicast_ether_addr(ea)) {
+               if (is_broadcast_ether_addr(ea))
+                       stats->broadcast++;
+               else
+                       stats->multicast++;
+       }
+}
+
+static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt)
+{
+       return pkt->pktinfooffset + pkt->pktinfolen;
+}
+
+static inline uint32_t
+hn_rndis_pktmsg_offset(uint32_t ofs)
+{
+       return ofs - offsetof(struct rndis_packet_msg, dataoffset);
+}
+
+static void hn_txd_init(struct rte_mempool *mp __rte_unused,
+                       void *opaque, void *obj, unsigned int idx)
+{
+       struct hn_txdesc *txd = obj;
+       struct rte_eth_dev *dev = opaque;
+       struct rndis_packet_msg *pkt;
+
+       memset(txd, 0, sizeof(*txd));
+       txd->chim_index = idx;
+
+       pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
+                               rte_align32pow2(HN_RNDIS_PKT_LEN),
+                               dev->device->numa_node);
+       if (!pkt)
+               rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
+
+       txd->rndis_pkt = pkt;
+}
+
+/*
+ * Unlike Linux and FreeBSD, this driver uses a mempool
+ * to limit outstanding transmits and reserve buffers
+ */
+int
+hn_tx_pool_init(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       char name[RTE_MEMPOOL_NAMESIZE];
+       struct rte_mempool *mp;
+
+       snprintf(name, sizeof(name),
+                "hn_txd_%u", dev->data->port_id);
+
+       PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
+                    name, hv->chim_cnt, sizeof(struct hn_txdesc),
+                    dev->device->numa_node);
+
+       mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
+                               HN_TXD_CACHE_SIZE, 0,
+                               NULL, NULL,
+                               hn_txd_init, dev,
+                               dev->device->numa_node, 0);
+       if (!mp) {
+               PMD_DRV_LOG(ERR,
+                           "mempool %s create failed: %d", name, rte_errno);
+               return -rte_errno;
+       }
+
+       hv->tx_pool = mp;
+       return 0;
+}
+
+static void hn_reset_txagg(struct hn_tx_queue *txq)
+{
+       txq->agg_szleft = txq->agg_szmax;
+       txq->agg_pktleft = txq->agg_pktmax;
+       txq->agg_txd = NULL;
+       txq->agg_prevpkt = NULL;
+}
+
+int
+hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
+                     uint16_t queue_idx, uint16_t nb_desc __rte_unused,
+                     unsigned int socket_id,
+                     const struct rte_eth_txconf *tx_conf)
+
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct hn_tx_queue *txq;
+       uint32_t tx_free_thresh;
+
+       PMD_INIT_FUNC_TRACE();
+
+       txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE,
+                                socket_id);
+       if (!txq)
+               return -ENOMEM;
+
+       txq->hv = hv;
+       txq->chan = hv->channels[queue_idx];
+       txq->port_id = dev->data->port_id;
+       txq->queue_id = queue_idx;
+
+       tx_free_thresh = tx_conf->tx_free_thresh;
+       if (tx_free_thresh == 0)
+               tx_free_thresh = RTE_MIN(hv->chim_cnt / 4,
+                                        DEFAULT_TX_FREE_THRESH);
+
+       if (tx_free_thresh >= hv->chim_cnt - 3)
+               tx_free_thresh = hv->chim_cnt - 3;
+
+       txq->free_thresh = tx_free_thresh;
+
+       txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
+       txq->agg_pktmax = hv->rndis_agg_pkts;
+       txq->agg_align  = hv->rndis_agg_align;
+
+       hn_reset_txagg(txq);
+
+       dev->data->tx_queues[queue_idx] = txq;
+
+       return 0;
+}
+
+void
+hn_dev_tx_queue_release(void *arg)
+{
+       struct hn_tx_queue *txq = arg;
+       struct hn_txdesc *txd;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (!txq)
+               return;
+
+       /* If any pending data is still present just drop it */
+       txd = txq->agg_txd;
+       if (txd)
+               rte_mempool_put(txq->hv->tx_pool, txd);
+
+       rte_free(txq);
+}
+
+static void
+hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
+                     unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
+{
+       struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
+       struct hn_tx_queue *txq;
+
+       /* Control packets are sent with xacid == 0 */
+       if (!txd)
+               return;
+
+       txq = dev->data->tx_queues[queue_id];
+       if (likely(ack->status == NVS_STATUS_OK)) {
+               PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u",
+                          txq->port_id, txq->queue_id, txd->chim_index,
+                          txd->packets, txd->data_size);
+               txq->stats.bytes += txd->data_size;
+               txq->stats.packets += txd->packets;
+       } else {
+               PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u",
+                          txq->port_id, txq->queue_id, txd->chim_index, ack->status);
+               ++txq->stats.errors;
+       }
+
+       rte_pktmbuf_free(txd->m);
+
+       rte_mempool_put(txq->hv->tx_pool, txd);
+}
+
+/* Handle transmit completion events */
+static void
+hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id,
+                  const struct vmbus_chanpkt_hdr *pkt,
+                  const void *data)
+{
+       const struct hn_nvs_hdr *hdr = data;
+
+       switch (hdr->type) {
+       case NVS_TYPE_RNDIS_ACK:
+               hn_nvs_send_completed(dev, queue_id, pkt->xactid, data);
+               break;
+
+       default:
+               PMD_TX_LOG(NOTICE,
+                          "unexpected send completion type %u",
+                          hdr->type);
+       }
+}
+
+/* Parse per-packet info (meta data) */
+static int
+hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen,
+               struct hn_rxinfo *info)
+{
+       const struct rndis_pktinfo *pi = info_data;
+       uint32_t mask = 0;
+
+       while (info_dlen != 0) {
+               const void *data;
+               uint32_t dlen;
+
+               if (unlikely(info_dlen < sizeof(*pi)))
+                       return -EINVAL;
+
+               if (unlikely(info_dlen < pi->size))
+                       return -EINVAL;
+               info_dlen -= pi->size;
+
+               if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
+                       return -EINVAL;
+               if (unlikely(pi->size < pi->offset))
+                       return -EINVAL;
+
+               dlen = pi->size - pi->offset;
+               data = pi->data;
+
+               switch (pi->type) {
+               case NDIS_PKTINFO_TYPE_VLAN:
+                       if (unlikely(dlen < NDIS_VLAN_INFO_SIZE))
+                               return -EINVAL;
+                       info->vlan_info = *((const uint32_t *)data);
+                       mask |= HN_RXINFO_VLAN;
+                       break;
+
+               case NDIS_PKTINFO_TYPE_CSUM:
+                       if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE))
+                               return -EINVAL;
+                       info->csum_info = *((const uint32_t *)data);
+                       mask |= HN_RXINFO_CSUM;
+                       break;
+
+               case NDIS_PKTINFO_TYPE_HASHVAL:
+                       if (unlikely(dlen < NDIS_HASH_VALUE_SIZE))
+                               return -EINVAL;
+                       info->hash_value = *((const uint32_t *)data);
+                       mask |= HN_RXINFO_HASHVAL;
+                       break;
+
+               case NDIS_PKTINFO_TYPE_HASHINF:
+                       if (unlikely(dlen < NDIS_HASH_INFO_SIZE))
+                               return -EINVAL;
+                       info->hash_info = *((const uint32_t *)data);
+                       mask |= HN_RXINFO_HASHINF;
+                       break;
+
+               default:
+                       goto next;
+               }
+
+               if (mask == HN_RXINFO_ALL)
+                       break; /* All found; done */
+next:
+               pi = (const struct rndis_pktinfo *)
+                   ((const uint8_t *)pi + pi->size);
+       }
+
+       /*
+        * Final fixup.
+        * - If there is no hash value, invalidate the hash info.
+        */
+       if (!(mask & HN_RXINFO_HASHVAL))
+               info->hash_info = HN_NDIS_HASH_INFO_INVALID;
+       return 0;
+}
+
+/*
+ * Ack the consumed RXBUF associated w/ this channel packet,
+ * so that this RXBUF can be recycled by the hypervisor.
+ */
+static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb)
+{
+       struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo;
+       struct hn_data *hv = rxb->hv;
+
+       if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) {
+               hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid);
+               --hv->rxbuf_outstanding;
+       }
+}
+
+static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque)
+{
+       hn_rx_buf_release(opaque);
+}
+
+static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq,
+                                           const struct vmbus_chanpkt_rxbuf *pkt)
+{
+       struct hn_rx_bufinfo *rxb;
+
+       rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid;
+       rxb->chan = rxq->chan;
+       rxb->xactid = pkt->hdr.xactid;
+       rxb->hv = rxq->hv;
+
+       rxb->shinfo.free_cb = hn_rx_buf_free_cb;
+       rxb->shinfo.fcb_opaque = rxb;
+       rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1);
+       return rxb;
+}
+
+static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
+                    uint8_t *data, unsigned int headroom, unsigned int dlen,
+                    const struct hn_rxinfo *info)
+{
+       struct hn_data *hv = rxq->hv;
+       struct rte_mbuf *m;
+
+       m = rte_pktmbuf_alloc(rxq->mb_pool);
+       if (unlikely(!m)) {
+               struct rte_eth_dev *dev =
+                       &rte_eth_devices[rxq->port_id];
+
+               dev->data->rx_mbuf_alloc_failed++;
+               return;
+       }
+
+       /*
+        * For large packets, avoid copy if possible but need to keep
+        * some space available in receive area for later packets.
+        */
+       if (dlen >= HN_RXCOPY_THRESHOLD &&
+           hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) {
+               struct rte_mbuf_ext_shared_info *shinfo;
+               const void *rxbuf;
+               rte_iova_t iova;
+
+               /*
+                * Build an external mbuf that points to recveive area.
+                * Use refcount to handle multiple packets in same
+                * receive buffer section.
+                */
+               rxbuf = hv->rxbuf_res->addr;
+               iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf);
+               shinfo = &rxb->shinfo;
+
+               if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1)
+                       ++hv->rxbuf_outstanding;
+
+               rte_pktmbuf_attach_extbuf(m, data, iova,
+                                         dlen + headroom, shinfo);
+               m->data_off = headroom;
+       } else {
+               /* Mbuf's in pool must be large enough to hold small packets */
+               if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) {
+                       rte_pktmbuf_free_seg(m);
+                       ++rxq->stats.errors;
+                       return;
+               }
+               rte_memcpy(rte_pktmbuf_mtod(m, void *),
+                          data + headroom, dlen);
+       }
+
+       m->port = rxq->port_id;
+       m->pkt_len = dlen;
+       m->data_len = dlen;
+
+       if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
+               m->vlan_tci = info->vlan_info;
+               m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
+       }
+
+       if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
+               if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK)
+                       m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+
+               if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK
+                                      | NDIS_RXCSUM_INFO_TCPCS_OK))
+                       m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       }
+
+       if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
+               m->ol_flags |= PKT_RX_RSS_HASH;
+               m->hash.rss = info->hash_value;
+       }
+
+       PMD_RX_LOG(DEBUG, "port %u:%u RX id %" PRIu64 " size %u ol_flags %#" PRIx64,
+                  rxq->port_id, rxq->queue_id, rxb->xactid,
+                  m->pkt_len, m->ol_flags);
+
+       ++rxq->stats.packets;
+       rxq->stats.bytes += m->pkt_len;
+       hn_update_packet_stats(&rxq->stats, m);
+
+       if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) {
+               ++rxq->ring_full;
+               rte_pktmbuf_free(m);
+       }
+}
+
+static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
+                            struct hn_rx_bufinfo *rxb,
+                            void *data, uint32_t dlen)
+{
+       unsigned int data_off, data_len, pktinfo_off, pktinfo_len;
+       const struct rndis_packet_msg *pkt = data;
+       struct hn_rxinfo info = {
+               .vlan_info = HN_NDIS_VLAN_INFO_INVALID,
+               .csum_info = HN_NDIS_RXCSUM_INFO_INVALID,
+               .hash_info = HN_NDIS_HASH_INFO_INVALID,
+       };
+       int err;
+
+       hn_rndis_dump(pkt);
+
+       if (unlikely(dlen < sizeof(*pkt)))
+               goto error;
+
+       if (unlikely(dlen < pkt->len))
+               goto error; /* truncated RNDIS from host */
+
+       if (unlikely(pkt->len < pkt->datalen
+                    + pkt->oobdatalen + pkt->pktinfolen))
+               goto error;
+
+       if (unlikely(pkt->datalen == 0))
+               goto error;
+
+       /* Check offsets. */
+       if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN))
+               goto error;
+
+       if (likely(pkt->pktinfooffset > 0) &&
+           unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN ||
+                    (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)))
+               goto error;
+
+       data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
+       data_len = pkt->datalen;
+       pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset);
+       pktinfo_len = pkt->pktinfolen;
+
+       if (likely(pktinfo_len > 0)) {
+               err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off,
+                                     pktinfo_len, &info);
+               if (err)
+                       goto error;
+       }
+
+       if (unlikely(data_off + data_len > pkt->len))
+               goto error;
+
+       if (unlikely(data_len < ETHER_HDR_LEN))
+               goto error;
+
+       hn_rxpkt(rxq, rxb, data, data_off, data_len, &info);
+       return;
+error:
+       ++rxq->stats.errors;
+}
+
+static void
+hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
+                struct hn_rx_bufinfo *rxb, void *buf, uint32_t len)
+{
+       const struct rndis_msghdr *hdr = buf;
+
+       switch (hdr->type) {
+       case RNDIS_PACKET_MSG:
+               if (dev->data->dev_started)
+                       hn_rndis_rx_data(rxq, rxb, buf, len);
+               break;
+
+       case RNDIS_INDICATE_STATUS_MSG:
+               hn_rndis_link_status(rxq->hv, buf);
+               break;
+
+       case RNDIS_INITIALIZE_CMPLT:
+       case RNDIS_QUERY_CMPLT:
+       case RNDIS_SET_CMPLT:
+               hn_rndis_receive_response(rxq->hv, buf, len);
+               break;
+
+       default:
+               PMD_DRV_LOG(NOTICE,
+                           "unexpected RNDIS message (type %#x len %u)",
+                           hdr->type, len);
+               break;
+       }
+}
+
+static void
+hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
+                   struct hn_data *hv,
+                   struct hn_rx_queue *rxq,
+                   const struct vmbus_chanpkt_hdr *hdr,
+                   const void *buf)
+{
+       const struct vmbus_chanpkt_rxbuf *pkt;
+       const struct hn_nvs_hdr *nvs_hdr = buf;
+       uint32_t rxbuf_sz = hv->rxbuf_res->len;
+       char *rxbuf = hv->rxbuf_res->addr;
+       unsigned int i, hlen, count;
+       struct hn_rx_bufinfo *rxb;
+
+       /* At minimum we need type header */
+       if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) {
+               PMD_RX_LOG(ERR, "invalid receive nvs RNDIS");
+               return;
+       }
+
+       /* Make sure that this is a RNDIS message. */
+       if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) {
+               PMD_RX_LOG(ERR, "nvs type %u, not RNDIS",
+                          nvs_hdr->type);
+               return;
+       }
+
+       hlen = vmbus_chanpkt_getlen(hdr->hlen);
+       if (unlikely(hlen < sizeof(*pkt))) {
+               PMD_RX_LOG(ERR, "invalid rxbuf chanpkt");
+               return;
+       }
+
+       pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr);
+       if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) {
+               PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x",
+                          pkt->rxbuf_id);
+               return;
+       }
+
+       count = pkt->rxbuf_cnt;
+       if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf,
+                                    rxbuf[count]))) {
+               PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count);
+               return;
+       }
+
+       if (pkt->hdr.xactid > hv->rxbuf_section_cnt) {
+               PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64,
+                          pkt->hdr.xactid);
+               return;
+       }
+
+       /* Setup receive buffer info to allow for callback */
+       rxb = hn_rx_buf_init(rxq, pkt);
+
+       /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
+       for (i = 0; i < count; ++i) {
+               unsigned int ofs, len;
+
+               ofs = pkt->rxbuf[i].ofs;
+               len = pkt->rxbuf[i].len;
+
+               if (unlikely(ofs + len > rxbuf_sz)) {
+                       PMD_RX_LOG(ERR,
+                                  "%uth RNDIS msg overflow ofs %u, len %u",
+                                  i, ofs, len);
+                       continue;
+               }
+
+               if (unlikely(len == 0)) {
+                       PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len);
+                       continue;
+               }
+
+               hn_rndis_receive(dev, rxq, rxb,
+                                rxbuf + ofs, len);
+       }
+
+       /* Send ACK now if external mbuf not used */
+       hn_rx_buf_release(rxb);
+}
+
+struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
+                                     uint16_t queue_id,
+                                     unsigned int socket_id)
+{
+       struct hn_rx_queue *rxq;
+
+       rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq),
+                                RTE_CACHE_LINE_SIZE, socket_id);
+       if (rxq) {
+               rxq->hv = hv;
+               rxq->chan = hv->channels[queue_id];
+               rte_spinlock_init(&rxq->ring_lock);
+               rxq->port_id = hv->port_id;
+               rxq->queue_id = queue_id;
+
+               rxq->event_sz = HN_RXQ_EVENT_DEFAULT;
+               rxq->event_buf = rte_malloc_socket("RX_EVENTS",
+                                                  rxq->event_sz,
+                                                  RTE_CACHE_LINE_SIZE,
+                                                  socket_id);
+               if (!rxq->event_buf) {
+                       rte_free(rxq);
+                       rxq = NULL;
+               }
+       }
+       return rxq;
+}
+
+int
+hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
+                     uint16_t queue_idx, uint16_t nb_desc,
+                     unsigned int socket_id,
+                     const struct rte_eth_rxconf *rx_conf __rte_unused,
+                     struct rte_mempool *mp)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       uint32_t qmax = hv->rxbuf_section_cnt;
+       char ring_name[RTE_RING_NAMESIZE];
+       struct hn_rx_queue *rxq;
+       unsigned int count;
+       size_t size;
+       int err = -ENOMEM;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (nb_desc == 0 || nb_desc > qmax)
+               nb_desc = qmax;
+
+       if (queue_idx == 0) {
+               rxq = hv->primary;
+       } else {
+               rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
+               if (!rxq)
+                       return -ENOMEM;
+       }
+
+       rxq->mb_pool = mp;
+
+       count = rte_align32pow2(nb_desc);
+       size = sizeof(struct rte_ring) + count * sizeof(void *);
+       rxq->rx_ring = rte_malloc_socket("RX_RING", size,
+                                        RTE_CACHE_LINE_SIZE,
+                                        socket_id);
+       if (!rxq->rx_ring)
+               goto fail;
+
+       /*
+        * Staging ring from receive event logic to rx_pkts.
+        * rx_pkts assumes caller is handling multi-thread issue.
+        * event logic has locking.
+        */
+       snprintf(ring_name, sizeof(ring_name),
+                "hn_rx_%u_%u", dev->data->port_id, queue_idx);
+       err = rte_ring_init(rxq->rx_ring, ring_name,
+                           count, 0);
+       if (err)
+               goto fail;
+
+       dev->data->rx_queues[queue_idx] = rxq;
+       return 0;
+
+fail:
+       rte_free(rxq->rx_ring);
+       rte_free(rxq->event_buf);
+       rte_free(rxq);
+       return -ENOMEM;
+}
+
+void
+hn_dev_rx_queue_release(void *arg)
+{
+       struct hn_rx_queue *rxq = arg;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (!rxq)
+               return;
+
+       rte_free(rxq->rx_ring);
+       rxq->rx_ring = NULL;
+       rxq->mb_pool = NULL;
+
+       if (rxq != rxq->hv->primary) {
+               rte_free(rxq->event_buf);
+               rte_free(rxq);
+       }
+}
+
+static void
+hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
+                    const void *data)
+{
+       const struct hn_nvs_hdr *hdr = data;
+
+       if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
+               PMD_DRV_LOG(ERR, "invalid nvs notify");
+               return;
+       }
+
+       PMD_DRV_LOG(INFO,
+                   "got notify, nvs type %u", hdr->type);
+}
+
+/*
+ * Process pending events on the channel.
+ * Called from both Rx queue poll and Tx cleanup
+ */
+void hn_process_events(struct hn_data *hv, uint16_t queue_id)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
+       struct hn_rx_queue *rxq;
+       int ret = 0;
+
+       rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
+
+       /* If no pending data then nothing to do */
+       if (rte_vmbus_chan_rx_empty(rxq->chan))
+               return;
+
+       /*
+        * Since channel is shared between Rx and TX queue need to have a lock
+        * since DPDK does not force same CPU to be used for Rx/Tx.
+        */
+       if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock)))
+               return;
+
+       for (;;) {
+               const struct vmbus_chanpkt_hdr *pkt;
+               uint32_t len = rxq->event_sz;
+               const void *data;
+
+               ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len);
+               if (ret == -EAGAIN)
+                       break;  /* ring is empty */
+
+               if (ret == -ENOBUFS) {
+                       /* expanded buffer needed */
+                       len = rte_align32pow2(len);
+                       PMD_DRV_LOG(DEBUG, "expand event buf to %u", len);
+
+                       rxq->event_buf = rte_realloc(rxq->event_buf,
+                                                    len, RTE_CACHE_LINE_SIZE);
+                       if (rxq->event_buf) {
+                               rxq->event_sz = len;
+                               continue;
+                       }
+
+                       rte_exit(EXIT_FAILURE, "can not expand event buf!\n");
+                       break;
+               }
+
+               if (ret != 0) {
+                       PMD_DRV_LOG(ERR, "vmbus ring buffer error: %d", ret);
+                       break;
+               }
+
+               pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf;
+               data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen);
+
+               switch (pkt->type) {
+               case VMBUS_CHANPKT_TYPE_COMP:
+                       hn_nvs_handle_comp(dev, queue_id, pkt, data);
+                       break;
+
+               case VMBUS_CHANPKT_TYPE_RXBUF:
+                       hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data);
+                       break;
+
+               case VMBUS_CHANPKT_TYPE_INBAND:
+                       hn_nvs_handle_notify(pkt, data);
+                       break;
+
+               default:
+                       PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type);
+                       break;
+               }
+       }
+       rte_spinlock_unlock(&rxq->ring_lock);
+
+       if (unlikely(ret != -EAGAIN))
+               PMD_DRV_LOG(ERR, "channel receive failed: %d", ret);
+}
+
+static void hn_append_to_chim(struct hn_tx_queue *txq,
+                             struct rndis_packet_msg *pkt,
+                             const struct rte_mbuf *m)
+{
+       struct hn_txdesc *txd = txq->agg_txd;
+       uint8_t *buf = (uint8_t *)pkt;
+       unsigned int data_offs;
+
+       hn_rndis_dump(pkt);
+
+       data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
+       txd->chim_size += pkt->len;
+       txd->data_size += m->pkt_len;
+       ++txd->packets;
+       hn_update_packet_stats(&txq->stats, m);
+
+       for (; m; m = m->next) {
+               uint16_t len = rte_pktmbuf_data_len(m);
+
+               rte_memcpy(buf + data_offs,
+                          rte_pktmbuf_mtod(m, const char *), len);
+               data_offs += len;
+       }
+}
+
+/*
+ * Send pending aggregated data in chimney buffer (if any).
+ * Returns error if send was unsuccessful because channel ring buffer
+ * was full.
+ */
+static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
+
+{
+       struct hn_txdesc *txd = txq->agg_txd;
+       struct hn_nvs_rndis rndis;
+       int ret;
+
+       if (!txd)
+               return 0;
+
+       rndis = (struct hn_nvs_rndis) {
+               .type = NVS_TYPE_RNDIS,
+               .rndis_mtype = NVS_RNDIS_MTYPE_DATA,
+               .chim_idx = txd->chim_index,
+               .chim_sz = txd->chim_size,
+       };
+
+       PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u",
+                  txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size);
+
+       ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC,
+                         &rndis, sizeof(rndis), (uintptr_t)txd, need_sig);
+
+       if (likely(ret == 0))
+               hn_reset_txagg(txq);
+       else
+               PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d",
+                          txq->port_id, txq->queue_id, ret);
+
+       return ret;
+}
+
+static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
+                                   struct hn_tx_queue *txq)
+{
+       struct hn_txdesc *txd;
+
+       if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
+               ++txq->stats.nomemory;
+               PMD_TX_LOG(DEBUG, "tx pool exhausted!");
+               return NULL;
+       }
+
+       txd->m = NULL;
+       txd->queue_id = txq->queue_id;
+       txd->packets = 0;
+       txd->data_size = 0;
+       txd->chim_size = 0;
+
+       return txd;
+}
+
+static void *
+hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
+{
+       struct hn_txdesc *agg_txd = txq->agg_txd;
+       struct rndis_packet_msg *pkt;
+       void *chim;
+
+       if (agg_txd) {
+               unsigned int padding, olen;
+
+               /*
+                * Update the previous RNDIS packet's total length,
+                * it can be increased due to the mandatory alignment
+                * padding for this RNDIS packet.  And update the
+                * aggregating txdesc's chimney sending buffer size
+                * accordingly.
+                *
+                * Zero-out the padding, as required by the RNDIS spec.
+                */
+               pkt = txq->agg_prevpkt;
+               olen = pkt->len;
+               padding = RTE_ALIGN(olen, txq->agg_align) - olen;
+               if (padding > 0) {
+                       agg_txd->chim_size += padding;
+                       pkt->len += padding;
+                       memset((uint8_t *)pkt + olen, 0, padding);
+               }
+
+               chim = (uint8_t *)pkt + pkt->len;
+
+               txq->agg_pktleft--;
+               txq->agg_szleft -= pktsize;
+               if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
+                       /*
+                        * Probably can't aggregate more packets,
+                        * flush this aggregating txdesc proactively.
+                        */
+                       txq->agg_pktleft = 0;
+               }
+       } else {
+               agg_txd = hn_new_txd(hv, txq);
+               if (!agg_txd)
+                       return NULL;
+
+               chim = (uint8_t *)hv->chim_res->addr
+                       + agg_txd->chim_index * hv->chim_szmax;
+
+               txq->agg_txd = agg_txd;
+               txq->agg_pktleft = txq->agg_pktmax - 1;
+               txq->agg_szleft = txq->agg_szmax - pktsize;
+       }
+       txq->agg_prevpkt = chim;
+
+       return chim;
+}
+
+static inline void *
+hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt,
+                       uint32_t pi_dlen, uint32_t pi_type)
+{
+       const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen);
+       struct rndis_pktinfo *pi;
+
+       /*
+        * Per-packet-info does not move; it only grows.
+        *
+        * NOTE:
+        * pktinfooffset in this phase counts from the beginning
+        * of rndis_packet_msg.
+        */
+       pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt));
+
+       pkt->pktinfolen += pi_size;
+
+       pi->size = pi_size;
+       pi->type = pi_type;
+       pi->offset = RNDIS_PKTINFO_OFFSET;
+
+       return pi->data;
+}
+
+/* Put RNDIS header and packet info on packet */
+static void hn_encap(struct rndis_packet_msg *pkt,
+                    uint16_t queue_id,
+                    const struct rte_mbuf *m)
+{
+       unsigned int hlen = m->l2_len + m->l3_len;
+       uint32_t *pi_data;
+       uint32_t pkt_hlen;
+
+       pkt->type = RNDIS_PACKET_MSG;
+       pkt->len = m->pkt_len;
+       pkt->dataoffset = 0;
+       pkt->datalen = m->pkt_len;
+       pkt->oobdataoffset = 0;
+       pkt->oobdatalen = 0;
+       pkt->oobdataelements = 0;
+       pkt->pktinfooffset = sizeof(*pkt);
+       pkt->pktinfolen = 0;
+       pkt->vchandle = 0;
+       pkt->reserved = 0;
+
+       /*
+        * Set the hash value for this packet, to the queue_id to cause
+        * TX done event for this packet on the right channel.
+        */
+       pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE,
+                                         NDIS_PKTINFO_TYPE_HASHVAL);
+       *pi_data = queue_id;
+
+       if (m->ol_flags & PKT_TX_VLAN_PKT) {
+               pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE,
+                                                 NDIS_PKTINFO_TYPE_VLAN);
+               *pi_data = m->vlan_tci;
+       }
+
+       if (m->ol_flags & PKT_TX_TCP_SEG) {
+               pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE,
+                                                 NDIS_PKTINFO_TYPE_LSO);
+
+               if (m->ol_flags & PKT_TX_IPV6) {
+                       *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen,
+                                                          m->tso_segsz);
+               } else {
+                       *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen,
+                                                          m->tso_segsz);
+               }
+       } else if (m->ol_flags &
+                  (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) {
+               pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE,
+                                                 NDIS_PKTINFO_TYPE_CSUM);
+               *pi_data = 0;
+
+               if (m->ol_flags & PKT_TX_IPV6)
+                       *pi_data |= NDIS_TXCSUM_INFO_IPV6;
+               if (m->ol_flags & PKT_TX_IPV4) {
+                       *pi_data |= NDIS_TXCSUM_INFO_IPV4;
+
+                       if (m->ol_flags & PKT_TX_IP_CKSUM)
+                               *pi_data |= NDIS_TXCSUM_INFO_IPCS;
+               }
+
+               if (m->ol_flags & PKT_TX_TCP_CKSUM)
+                       *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen);
+               else if (m->ol_flags & PKT_TX_UDP_CKSUM)
+                       *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen);
+       }
+
+       pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen;
+       /* Fixup RNDIS packet message total length */
+       pkt->len += pkt_hlen;
+
+       /* Convert RNDIS packet message offsets */
+       pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
+       pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset);
+}
+
+/* How many scatter gather list elements ar needed */
+static unsigned int hn_get_slots(const struct rte_mbuf *m)
+{
+       unsigned int slots = 1; /* for RNDIS header */
+
+       while (m) {
+               unsigned int size = rte_pktmbuf_data_len(m);
+               unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK;
+
+               slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE;
+               m = m->next;
+       }
+
+       return slots;
+}
+
+/* Build scatter gather list from chained mbuf */
+static unsigned int hn_fill_sg(struct vmbus_gpa *sg,
+                              const struct rte_mbuf *m)
+{
+       unsigned int segs = 0;
+
+       while (m) {
+               rte_iova_t addr = rte_mbuf_data_iova(m);
+               unsigned int page = addr / PAGE_SIZE;
+               unsigned int offset = addr & PAGE_MASK;
+               unsigned int len = rte_pktmbuf_data_len(m);
+
+               while (len > 0) {
+                       unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset);
+
+                       sg[segs].page = page;
+                       sg[segs].ofs = offset;
+                       sg[segs].len = bytes;
+                       segs++;
+
+                       ++page;
+                       offset = 0;
+                       len -= bytes;
+               }
+               m = m->next;
+       }
+
+       return segs;
+}
+
+/* Transmit directly from mbuf */
+static int hn_xmit_sg(struct hn_tx_queue *txq,
+                     const struct hn_txdesc *txd, const struct rte_mbuf *m,
+                     bool *need_sig)
+{
+       struct vmbus_gpa sg[hn_get_slots(m)];
+       struct hn_nvs_rndis nvs_rndis = {
+               .type = NVS_TYPE_RNDIS,
+               .rndis_mtype = NVS_RNDIS_MTYPE_DATA,
+               .chim_sz = txd->chim_size,
+       };
+       rte_iova_t addr;
+       unsigned int segs;
+
+       /* attach aggregation data if present */
+       if (txd->chim_size > 0)
+               nvs_rndis.chim_idx = txd->chim_index;
+       else
+               nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID;
+
+       hn_rndis_dump(txd->rndis_pkt);
+
+       /* pass IOVA of rndis header in first segment */
+       addr = rte_malloc_virt2iova(txd->rndis_pkt);
+       if (unlikely(addr == RTE_BAD_IOVA)) {
+               PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova");
+               return -EINVAL;
+       }
+
+       sg[0].page = addr / PAGE_SIZE;
+       sg[0].ofs = addr & PAGE_MASK;
+       sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt));
+       segs = 1;
+
+       hn_update_packet_stats(&txq->stats, m);
+
+       segs += hn_fill_sg(sg + 1, m);
+
+       PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u",
+                  txq->port_id, txq->queue_id, txd->chim_index,
+                  segs, nvs_rndis.chim_sz);
+
+       return hn_nvs_send_sglist(txq->chan, sg, segs,
+                                 &nvs_rndis, sizeof(nvs_rndis),
+                                 (uintptr_t)txd, need_sig);
+}
+
+uint16_t
+hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct hn_tx_queue *txq = ptxq;
+       struct hn_data *hv = txq->hv;
+       bool need_sig = false;
+       uint16_t nb_tx;
+       int ret;
+
+       if (unlikely(hv->closed))
+               return 0;
+
+       if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
+               hn_process_events(hv, txq->queue_id);
+
+       for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+               struct rte_mbuf *m = tx_pkts[nb_tx];
+               uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
+               struct rndis_packet_msg *pkt;
+
+               /* For small packets aggregate them in chimney buffer */
+               if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
+                       /* If this packet will not fit, then flush  */
+                       if (txq->agg_pktleft == 0 ||
+                           RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) {
+                               if (hn_flush_txagg(txq, &need_sig))
+                                       goto fail;
+                       }
+
+                       pkt = hn_try_txagg(hv, txq, pkt_size);
+                       if (unlikely(!pkt))
+                               goto fail;
+
+                       hn_encap(pkt, txq->queue_id, m);
+                       hn_append_to_chim(txq, pkt, m);
+
+                       rte_pktmbuf_free(m);
+
+                       /* if buffer is full, flush */
+                       if (txq->agg_pktleft == 0 &&
+                           hn_flush_txagg(txq, &need_sig))
+                               goto fail;
+               } else {
+                       struct hn_txdesc *txd;
+
+                       /* can send chimney data and large packet at once */
+                       txd = txq->agg_txd;
+                       if (txd) {
+                               hn_reset_txagg(txq);
+                       } else {
+                               txd = hn_new_txd(hv, txq);
+                               if (unlikely(!txd))
+                                       goto fail;
+                       }
+
+                       pkt = txd->rndis_pkt;
+                       txd->m = m;
+                       txd->data_size += m->pkt_len;
+                       ++txd->packets;
+
+                       hn_encap(pkt, txq->queue_id, m);
+
+                       ret = hn_xmit_sg(txq, txd, m, &need_sig);
+                       if (unlikely(ret != 0)) {
+                               PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
+                               ++txq->stats.errors;
+                               rte_mempool_put(hv->tx_pool, txd);
+                               goto fail;
+                       }
+               }
+       }
+
+       /* If partial buffer left, then try and send it.
+        * if that fails, then reuse it on next send.
+        */
+       hn_flush_txagg(txq, &need_sig);
+
+fail:
+       if (need_sig)
+               rte_vmbus_chan_signal_tx(txq->chan);
+
+       return nb_tx;
+}
+
+uint16_t
+hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       struct hn_rx_queue *rxq = prxq;
+       struct hn_data *hv = rxq->hv;
+
+       if (unlikely(hv->closed))
+               return 0;
+
+       /* Get all outstanding receive completions */
+       hn_process_events(hv, rxq->queue_id);
+
+       /* Get mbufs off staging ring */
+       return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
+                                        nb_pkts, NULL);
+}
diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h
new file mode 100644 (file)
index 0000000..f0358c5
--- /dev/null
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2009-2018 Microsoft Corp.
+ * Copyright (c) 2016 Brocade Communications Systems, Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ */
+
+/*
+ * Tunable ethdev params
+ */
+#define HN_MIN_RX_BUF_SIZE     1024
+#define HN_MAX_XFER_LEN                2048
+#define        HN_MAX_MAC_ADDRS        1
+#define HN_MAX_CHANNELS                64
+
+/* Claimed to be 12232B */
+#define HN_MTU_MAX             (9 * 1024)
+
+/* Retry interval */
+#define HN_CHAN_INTERVAL_US    100
+
+/* Buffers need to be aligned */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef PAGE_MASK
+#define PAGE_MASK (PAGE_SIZE - 1)
+#endif
+
+struct hn_data;
+struct hn_txdesc;
+
+struct hn_stats {
+       uint64_t        packets;
+       uint64_t        bytes;
+       uint64_t        errors;
+       uint64_t        nomemory;
+       uint64_t        multicast;
+       uint64_t        broadcast;
+       /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+       uint64_t        size_bins[8];
+};
+
+struct hn_tx_queue {
+       struct hn_data  *hv;
+       struct vmbus_channel *chan;
+       uint16_t        port_id;
+       uint16_t        queue_id;
+       uint32_t        free_thresh;
+
+       /* Applied packet transmission aggregation limits. */
+       uint32_t        agg_szmax;
+       uint32_t        agg_pktmax;
+       uint32_t        agg_align;
+
+       /* Packet transmission aggregation states */
+       struct hn_txdesc *agg_txd;
+       uint32_t        agg_pktleft;
+       uint32_t        agg_szleft;
+       struct rndis_packet_msg *agg_prevpkt;
+
+       struct hn_stats stats;
+};
+
+struct hn_rx_queue {
+       struct hn_data  *hv;
+       struct vmbus_channel *chan;
+       struct rte_mempool *mb_pool;
+       struct rte_ring *rx_ring;
+       void    *event_buf;
+
+       rte_spinlock_t ring_lock;
+       uint32_t event_sz;
+       uint16_t port_id;
+       uint16_t queue_id;
+       struct hn_stats stats;
+       uint64_t ring_full;
+};
+
+
+/* multi-packet data from host */
+struct hn_rx_bufinfo {
+       struct vmbus_channel *chan;
+       struct hn_data *hv;
+       uint64_t        xactid;
+       struct rte_mbuf_ext_shared_info shinfo;
+} __rte_cache_aligned;
+
+struct hn_data {
+       struct rte_vmbus_device *vmbus;
+       struct hn_rx_queue *primary;
+       uint16_t        port_id;
+       bool            closed;
+       uint32_t        link_status;
+       uint32_t        link_speed;
+
+       struct rte_mem_resource *rxbuf_res;     /* UIO resource for Rx */
+       struct hn_rx_bufinfo *rxbuf_info;
+       uint32_t        rxbuf_section_cnt;      /* # of Rx sections */
+       volatile uint32_t rxbuf_outstanding;
+       uint16_t        max_queues;             /* Max available queues */
+       uint16_t        num_queues;
+       uint64_t        rss_offloads;
+
+       struct rte_mem_resource *chim_res;      /* UIO resource for Tx */
+       struct rte_mempool *tx_pool;            /* Tx descriptors */
+       uint32_t        chim_szmax;             /* Max size per buffer */
+       uint32_t        chim_cnt;               /* Max packets per buffer */
+
+       uint32_t        nvs_ver;
+       uint32_t        ndis_ver;
+       uint32_t        rndis_agg_size;
+       uint32_t        rndis_agg_pkts;
+       uint32_t        rndis_agg_align;
+
+       volatile uint32_t  rndis_pending;
+       rte_atomic32_t  rndis_req_id;
+       uint8_t         rndis_resp[256];
+
+       struct ether_addr mac_addr;
+       struct vmbus_channel *channels[HN_MAX_CHANNELS];
+};
+
+static inline struct vmbus_channel *
+hn_primary_chan(const struct hn_data *hv)
+{
+       return hv->channels[0];
+}
+
+void hn_process_events(struct hn_data *hv, uint16_t queue_id);
+
+uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+                     uint16_t nb_pkts);
+uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+                     uint16_t nb_pkts);
+
+int    hn_tx_pool_init(struct rte_eth_dev *dev);
+int    hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+                             uint16_t nb_desc, unsigned int socket_id,
+                             const struct rte_eth_txconf *tx_conf);
+void   hn_dev_tx_queue_release(void *arg);
+
+struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
+                                     uint16_t queue_id,
+                                     unsigned int socket_id);
+int    hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
+                             uint16_t queue_idx, uint16_t nb_desc,
+                             unsigned int socket_id,
+                             const struct rte_eth_rxconf *rx_conf,
+                             struct rte_mempool *mp);
+void   hn_dev_rx_queue_release(void *arg);
diff --git a/drivers/net/netvsc/meson.build b/drivers/net/netvsc/meson.build
new file mode 100644 (file)
index 0000000..a15b504
--- /dev/null
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Microsoft Corporation
+
+version = 2
+sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c')
+
+deps += ['bus_vmbus' ]
+
+allow_experimental_apis = true
diff --git a/drivers/net/netvsc/ndis.h b/drivers/net/netvsc/ndis.h
new file mode 100644 (file)
index 0000000..2e7ca99
--- /dev/null
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+#ifndef _NET_NDIS_H_
+#define _NET_NDIS_H_
+
+#define        NDIS_MEDIA_STATE_CONNECTED      0
+#define        NDIS_MEDIA_STATE_DISCONNECTED   1
+
+#define        NDIS_NETCHANGE_TYPE_POSSIBLE    1
+#define        NDIS_NETCHANGE_TYPE_DEFINITE    2
+#define        NDIS_NETCHANGE_TYPE_FROMMEDIA   3
+
+#define        NDIS_OFFLOAD_SET_NOCHG          0
+#define        NDIS_OFFLOAD_SET_ON             1
+#define        NDIS_OFFLOAD_SET_OFF            2
+
+/* a.k.a GRE MAC */
+#define        NDIS_ENCAP_TYPE_NVGRE           0x00000001
+
+#define        NDIS_HASH_FUNCTION_MASK         0x000000FF      /* see hash function */
+#define        NDIS_HASH_TYPE_MASK             0x00FFFF00      /* see hash type */
+
+/* hash function */
+#define        NDIS_HASH_FUNCTION_TOEPLITZ     0x00000001
+
+/* hash type */
+#define        NDIS_HASH_IPV4                  0x00000100
+#define        NDIS_HASH_TCP_IPV4              0x00000200
+#define        NDIS_HASH_IPV6                  0x00000400
+#define        NDIS_HASH_IPV6_EX               0x00000800
+#define        NDIS_HASH_TCP_IPV6              0x00001000
+#define        NDIS_HASH_TCP_IPV6_EX           0x00002000
+
+#define        NDIS_HASH_KEYSIZE_TOEPLITZ      40
+#define        NDIS_HASH_INDCNT                128
+
+#define        NDIS_OBJTYPE_DEFAULT            0x80
+#define        NDIS_OBJTYPE_RSS_CAPS           0x88
+#define        NDIS_OBJTYPE_RSS_PARAMS         0x89
+#define        NDIS_OBJTYPE_OFFLOAD            0xa7
+
+struct ndis_object_hdr {
+       uint8_t                 ndis_type;      /* NDIS_OBJTYPE_ */
+       uint8_t                 ndis_rev;       /* type specific */
+       uint16_t                ndis_size;      /* incl. this hdr */
+} __rte_packed;
+
+/*
+ * OID_TCP_OFFLOAD_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_DEFAULT
+ */
+struct ndis_offload_params {
+       struct ndis_object_hdr  ndis_hdr;
+       uint8_t                 ndis_ip4csum;   /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_tcp4csum;  /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_udp4csum;  /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_tcp6csum;  /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_udp6csum;  /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_lsov1;     /* NDIS_OFFLOAD_PARAM_ */
+       uint8_t                 ndis_ipsecv1;   /* NDIS_OFFLOAD_IPSECV1_ */
+       uint8_t                 ndis_lsov2_ip4; /* NDIS_OFFLOAD_LSOV2_ */
+       uint8_t                 ndis_lsov2_ip6; /* NDIS_OFFLOAD_LSOV2_ */
+       uint8_t                 ndis_tcp4conn;  /* 0 */
+       uint8_t                 ndis_tcp6conn;  /* 0 */
+       uint32_t                ndis_flags;     /* 0 */
+       /* NDIS >= 6.1 */
+       uint8_t                 ndis_ipsecv2;   /* NDIS_OFFLOAD_IPSECV2_ */
+       uint8_t                 ndis_ipsecv2_ip4;/* NDIS_OFFLOAD_IPSECV2_ */
+       /* NDIS >= 6.30 */
+       uint8_t                 ndis_rsc_ip4;   /* NDIS_OFFLOAD_RSC_ */
+       uint8_t                 ndis_rsc_ip6;   /* NDIS_OFFLOAD_RSC_ */
+       uint8_t                 ndis_encap;     /* NDIS_OFFLOAD_SET_ */
+       uint8_t                 ndis_encap_types;/* NDIS_ENCAP_TYPE_ */
+};
+
+#define        NDIS_OFFLOAD_PARAMS_SIZE        sizeof(struct ndis_offload_params)
+#define        NDIS_OFFLOAD_PARAMS_SIZE_6_1    \
+       offsetof(struct ndis_offload_params, ndis_rsc_ip4)
+
+#define        NDIS_OFFLOAD_PARAMS_REV_2       2       /* NDIS 6.1 */
+#define        NDIS_OFFLOAD_PARAMS_REV_3       3       /* NDIS 6.30 */
+
+#define        NDIS_OFFLOAD_PARAM_NOCHG        0       /* common */
+#define        NDIS_OFFLOAD_PARAM_OFF          1
+#define        NDIS_OFFLOAD_PARAM_TX           2
+#define        NDIS_OFFLOAD_PARAM_RX           3
+#define        NDIS_OFFLOAD_PARAM_TXRX         4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define        NDIS_OFFLOAD_LSOV1_OFF          1
+#define        NDIS_OFFLOAD_LSOV1_ON           2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define        NDIS_OFFLOAD_IPSECV1_OFF        1
+#define        NDIS_OFFLOAD_IPSECV1_AH         2
+#define        NDIS_OFFLOAD_IPSECV1_ESP        3
+#define        NDIS_OFFLOAD_IPSECV1_AH_ESP     4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define        NDIS_OFFLOAD_LSOV2_OFF          1
+#define        NDIS_OFFLOAD_LSOV2_ON           2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define        NDIS_OFFLOAD_IPSECV2_OFF        1
+#define        NDIS_OFFLOAD_IPSECV2_AH         2
+#define        NDIS_OFFLOAD_IPSECV2_ESP        3
+#define        NDIS_OFFLOAD_IPSECV2_AH_ESP     4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define        NDIS_OFFLOAD_RSC_OFF            1
+#define        NDIS_OFFLOAD_RSC_ON             2
+
+/*
+ * OID_GEN_RECEIVE_SCALE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_RSS_CAPS
+ */
+struct ndis_rss_caps {
+       struct ndis_object_hdr          ndis_hdr;
+       uint32_t                        ndis_caps;      /* NDIS_RSS_CAP_ */
+       uint32_t                        ndis_nmsi;      /* # of MSIs */
+       uint32_t                        ndis_nrxr;      /* # of RX rings */
+       /* NDIS >= 6.30 */
+       uint16_t                        ndis_nind;      /* # of indtbl ent. */
+       uint16_t                        ndis_pad;
+} __rte_packed;
+
+#define        NDIS_RSS_CAPS_SIZE              \
+       offsetof(struct ndis_rss_caps, ndis_pad)
+#define        NDIS_RSS_CAPS_SIZE_6_0          \
+       offsetof(struct ndis_rss_caps, ndis_nind)
+
+#define        NDIS_RSS_CAPS_REV_1             1       /* NDIS 6.{0,1,20} */
+#define        NDIS_RSS_CAPS_REV_2             2       /* NDIS 6.30 */
+
+#define        NDIS_RSS_CAP_MSI                0x01000000
+#define        NDIS_RSS_CAP_CLASSIFY_ISR       0x02000000
+#define        NDIS_RSS_CAP_CLASSIFY_DPC       0x04000000
+#define        NDIS_RSS_CAP_MSIX               0x08000000
+#define        NDIS_RSS_CAP_IPV4               0x00000100
+#define        NDIS_RSS_CAP_IPV6               0x00000200
+#define        NDIS_RSS_CAP_IPV6_EX            0x00000400
+#define        NDIS_RSS_CAP_HASH_TOEPLITZ      NDIS_HASH_FUNCTION_TOEPLITZ
+#define        NDIS_RSS_CAP_HASHFUNC_MASK      NDIS_HASH_FUNCTION_MASK
+
+/*
+ * OID_GEN_RECEIVE_SCALE_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_RSS_PARAMS
+ */
+struct ndis_rss_params {
+       struct ndis_object_hdr          ndis_hdr;
+       uint16_t                        ndis_flags;     /* NDIS_RSS_FLAG_ */
+       uint16_t                        ndis_bcpu;      /* base cpu 0 */
+       uint32_t                        ndis_hash;      /* NDIS_HASH_ */
+       uint16_t                        ndis_indsize;   /* indirect table */
+       uint32_t                        ndis_indoffset;
+       uint16_t                        ndis_keysize;   /* hash key */
+       uint32_t                        ndis_keyoffset;
+       /* NDIS >= 6.20 */
+       uint32_t                        ndis_cpumaskoffset;
+       uint32_t                        ndis_cpumaskcnt;
+       uint32_t                        ndis_cpumaskentsz;
+};
+
+#define        NDIS_RSS_PARAMS_SIZE            sizeof(struct ndis_rss_params)
+#define        NDIS_RSS_PARAMS_SIZE_6_0        \
+       offsetof(struct ndis_rss_params, ndis_cpumaskoffset)
+
+#define        NDIS_RSS_PARAMS_REV_1           1       /* NDIS 6.0 */
+#define        NDIS_RSS_PARAMS_REV_2           2       /* NDIS 6.20 */
+
+#define        NDIS_RSS_FLAG_NONE              0x0000
+#define        NDIS_RSS_FLAG_BCPU_UNCHG        0x0001
+#define        NDIS_RSS_FLAG_HASH_UNCHG        0x0002
+#define        NDIS_RSS_FLAG_IND_UNCHG         0x0004
+#define        NDIS_RSS_FLAG_KEY_UNCHG         0x0008
+#define        NDIS_RSS_FLAG_DISABLE           0x0010
+
+/* non-standard convenient struct */
+struct ndis_rssprm_toeplitz {
+       struct ndis_rss_params          rss_params;
+       /* Indirect table */
+       uint32_t                        rss_ind[NDIS_HASH_INDCNT];
+       /* Toeplitz hash key */
+       uint8_t                         rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ];
+};
+
+#define        NDIS_RSSPRM_TOEPLITZ_SIZE(nind) \
+       offsetof(struct ndis_rssprm_toeplitz, rss_ind[nind])
+
+/*
+ * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_OFFLOAD
+ */
+
+#define        NDIS_OFFLOAD_ENCAP_NONE         0x0000
+#define        NDIS_OFFLOAD_ENCAP_NULL         0x0001
+#define        NDIS_OFFLOAD_ENCAP_8023         0x0002
+#define        NDIS_OFFLOAD_ENCAP_8023PQ       0x0004
+#define        NDIS_OFFLOAD_ENCAP_8023PQ_OOB   0x0008
+#define        NDIS_OFFLOAD_ENCAP_RFC1483      0x0010
+
+struct ndis_csum_offload {
+       uint32_t                        ndis_ip4_txenc; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip4_txcsum;
+#define        NDIS_TXCSUM_CAP_IP4OPT          0x001
+#define        NDIS_TXCSUM_CAP_TCP4OPT         0x004
+#define        NDIS_TXCSUM_CAP_TCP4            0x010
+#define        NDIS_TXCSUM_CAP_UDP4            0x040
+#define        NDIS_TXCSUM_CAP_IP4             0x100
+       uint32_t                        ndis_ip4_rxenc; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip4_rxcsum;
+#define        NDIS_RXCSUM_CAP_IP4OPT          0x001
+#define        NDIS_RXCSUM_CAP_TCP4OPT         0x004
+#define        NDIS_RXCSUM_CAP_TCP4            0x010
+#define        NDIS_RXCSUM_CAP_UDP4            0x040
+#define        NDIS_RXCSUM_CAP_IP4             0x100
+       uint32_t                        ndis_ip6_txenc; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip6_txcsum;
+#define        NDIS_TXCSUM_CAP_IP6EXT          0x001
+#define        NDIS_TXCSUM_CAP_TCP6OPT         0x004
+#define        NDIS_TXCSUM_CAP_TCP6            0x010
+#define        NDIS_TXCSUM_CAP_UDP6            0x040
+       uint32_t                        ndis_ip6_rxenc; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip6_rxcsum;
+#define        NDIS_RXCSUM_CAP_IP6EXT          0x001
+#define        NDIS_RXCSUM_CAP_TCP6OPT         0x004
+#define        NDIS_RXCSUM_CAP_TCP6            0x010
+#define        NDIS_RXCSUM_CAP_UDP6            0x040
+};
+
+struct ndis_lsov1_offload {
+       uint32_t                        ndis_encap;     /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_maxsize;
+       uint32_t                        ndis_minsegs;
+       uint32_t                        ndis_opts;
+};
+
+struct ndis_ipsecv1_offload {
+       uint32_t                        ndis_encap;     /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ah_esp;
+       uint32_t                        ndis_xport_tun;
+       uint32_t                        ndis_ip4_opts;
+       uint32_t                        ndis_flags;
+       uint32_t                        ndis_ip4_ah;
+       uint32_t                        ndis_ip4_esp;
+};
+
+struct ndis_lsov2_offload {
+       uint32_t                        ndis_ip4_encap; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip4_maxsz;
+       uint32_t                        ndis_ip4_minsg;
+       uint32_t                        ndis_ip6_encap; /*NDIS_OFFLOAD_ENCAP_*/
+       uint32_t                        ndis_ip6_maxsz;
+       uint32_t                        ndis_ip6_minsg;
+       uint32_t                        ndis_ip6_opts;
+#define        NDIS_LSOV2_CAP_IP6EXT           0x001
+#define        NDIS_LSOV2_CAP_TCP6OPT          0x004
+};
+
+struct ndis_ipsecv2_offload {
+       uint32_t                        ndis_encap;     /*NDIS_OFFLOAD_ENCAP_*/
+       uint16_t                        ndis_ip6;
+       uint16_t                        ndis_ip4opt;
+       uint16_t                        ndis_ip6ext;
+       uint16_t                        ndis_ah;
+       uint16_t                        ndis_esp;
+       uint16_t                        ndis_ah_esp;
+       uint16_t                        ndis_xport;
+       uint16_t                        ndis_tun;
+       uint16_t                        ndis_xport_tun;
+       uint16_t                        ndis_lso;
+       uint16_t                        ndis_extseq;
+       uint32_t                        ndis_udp_esp;
+       uint32_t                        ndis_auth;
+       uint32_t                        ndis_crypto;
+       uint32_t                        ndis_sa_caps;
+};
+
+struct ndis_rsc_offload {
+       uint16_t                        ndis_ip4;
+       uint16_t                        ndis_ip6;
+};
+
+struct ndis_encap_offload {
+       uint32_t                        ndis_flags;
+       uint32_t                        ndis_maxhdr;
+};
+
+struct ndis_offload {
+       struct ndis_object_hdr          ndis_hdr;
+       struct ndis_csum_offload        ndis_csum;
+       struct ndis_lsov1_offload       ndis_lsov1;
+       struct ndis_ipsecv1_offload     ndis_ipsecv1;
+       struct ndis_lsov2_offload       ndis_lsov2;
+       uint32_t                        ndis_flags;
+       /* NDIS >= 6.1 */
+       struct ndis_ipsecv2_offload     ndis_ipsecv2;
+       /* NDIS >= 6.30 */
+       struct ndis_rsc_offload         ndis_rsc;
+       struct ndis_encap_offload       ndis_encap_gre;
+};
+
+#define        NDIS_OFFLOAD_SIZE               sizeof(struct ndis_offload)
+#define        NDIS_OFFLOAD_SIZE_6_0           offsetof(struct ndis_offload, ndis_ipsecv2)
+#define        NDIS_OFFLOAD_SIZE_6_1           offsetof(struct ndis_offload, ndis_rsc)
+
+#define        NDIS_OFFLOAD_REV_1              1       /* NDIS 6.0 */
+#define        NDIS_OFFLOAD_REV_2              2       /* NDIS 6.1 */
+#define        NDIS_OFFLOAD_REV_3              3       /* NDIS 6.30 */
+
+/*
+ * Per-packet-info
+ */
+
+/* VLAN */
+#define        NDIS_VLAN_INFO_SIZE             sizeof(uint32_t)
+#define        NDIS_VLAN_INFO_PRI_MASK         0x0007
+#define        NDIS_VLAN_INFO_CFI_MASK         0x0008
+#define        NDIS_VLAN_INFO_ID_MASK          0xfff0
+#define        NDIS_VLAN_INFO_MAKE(id, pri, cfi)       \
+       (((pri) & NDIS_VLAN_INFO_PRI_MASK) |    \
+        (((cfi) & 0x1) << 3) | (((id) & 0xfff) << 4))
+#define        NDIS_VLAN_INFO_ID(inf)          (((inf) & NDIS_VLAN_INFO_ID_MASK) >> 4)
+#define        NDIS_VLAN_INFO_CFI(inf)         (((inf) & NDIS_VLAN_INFO_CFI_MASK) >> 3)
+#define        NDIS_VLAN_INFO_PRI(inf)         ((inf) & NDIS_VLAN_INFO_PRI_MASK)
+
+/* Reception checksum */
+#define        NDIS_RXCSUM_INFO_SIZE           sizeof(uint32_t)
+#define        NDIS_RXCSUM_INFO_TCPCS_FAILED   0x0001
+#define        NDIS_RXCSUM_INFO_UDPCS_FAILED   0x0002
+#define        NDIS_RXCSUM_INFO_IPCS_FAILED    0x0004
+#define        NDIS_RXCSUM_INFO_TCPCS_OK       0x0008
+#define        NDIS_RXCSUM_INFO_UDPCS_OK       0x0010
+#define        NDIS_RXCSUM_INFO_IPCS_OK        0x0020
+#define        NDIS_RXCSUM_INFO_LOOPBACK       0x0040
+#define        NDIS_RXCSUM_INFO_TCPCS_INVAL    0x0080
+#define        NDIS_RXCSUM_INFO_IPCS_INVAL     0x0100
+
+/* LSOv2 */
+#define        NDIS_LSO2_INFO_SIZE             sizeof(uint32_t)
+#define        NDIS_LSO2_INFO_MSS_MASK         0x000fffff
+#define        NDIS_LSO2_INFO_THOFF_MASK       0x3ff00000
+#define        NDIS_LSO2_INFO_ISLSO2           0x40000000
+#define        NDIS_LSO2_INFO_ISIPV6           0x80000000
+
+#define        NDIS_LSO2_INFO_MAKE(thoff, mss)                         \
+       ((((uint32_t)(mss)) & NDIS_LSO2_INFO_MSS_MASK) |        \
+        ((((uint32_t)(thoff)) & 0x3ff) << 20) |                \
+        NDIS_LSO2_INFO_ISLSO2)
+
+#define        NDIS_LSO2_INFO_MAKEIPV4(thoff, mss)                     \
+       NDIS_LSO2_INFO_MAKE((thoff), (mss))
+
+#define        NDIS_LSO2_INFO_MAKEIPV6(thoff, mss)                     \
+       (NDIS_LSO2_INFO_MAKE((thoff), (mss)) | NDIS_LSO2_INFO_ISIPV6)
+
+/* Transmission checksum */
+#define        NDIS_TXCSUM_INFO_SIZE           sizeof(uint32_t)
+#define        NDIS_TXCSUM_INFO_IPV4           0x00000001
+#define        NDIS_TXCSUM_INFO_IPV6           0x00000002
+#define        NDIS_TXCSUM_INFO_TCPCS          0x00000004
+#define        NDIS_TXCSUM_INFO_UDPCS          0x00000008
+#define        NDIS_TXCSUM_INFO_IPCS           0x00000010
+#define        NDIS_TXCSUM_INFO_THOFF          0x03ff0000
+
+#define        NDIS_TXCSUM_INFO_MKL4CS(thoff, flag)                    \
+       ((((uint32_t)(thoff)) << 16) | (flag))
+
+#define        NDIS_TXCSUM_INFO_MKTCPCS(thoff)                         \
+       NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_TCPCS)
+
+#define        NDIS_TXCSUM_INFO_MKUDPCS(thoff)                         \
+       NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS)
+
+#endif /* !_NET_NDIS_H_ */
diff --git a/drivers/net/netvsc/rndis.h b/drivers/net/netvsc/rndis.h
new file mode 100644 (file)
index 0000000..eac9a99
--- /dev/null
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * Copyright (c) 2010 Jonathan Armani <armani@openbsd.org>
+ * Copyright (c) 2010 Fabien Romano <fabien@openbsd.org>
+ * Copyright (c) 2010 Michael Knudsen <mk@openbsd.org>
+ * All rights reserved.
+ */
+
+#ifndef        _NET_RNDIS_H_
+#define        _NET_RNDIS_H_
+
+/* Canonical major/minor version as of 22th Aug. 2016. */
+#define        RNDIS_VERSION_MAJOR             0x00000001
+#define        RNDIS_VERSION_MINOR             0x00000000
+
+#define        RNDIS_STATUS_SUCCESS            0x00000000
+#define        RNDIS_STATUS_PENDING            0x00000103
+
+#define RNDIS_STATUS_ONLINE            0x40010003
+#define RNDIS_STATUS_RESET_START       0x40010004
+#define RNDIS_STATUS_RESET_END         0x40010005
+#define RNDIS_STATUS_RING_STATUS       0x40010006
+#define RNDIS_STATUS_CLOSED            0x40010007
+#define RNDIS_STATUS_WAN_LINE_UP       0x40010008
+#define RNDIS_STATUS_WAN_LINE_DOWN     0x40010009
+#define RNDIS_STATUS_WAN_FRAGMENT      0x4001000A
+#define        RNDIS_STATUS_MEDIA_CONNECT      0x4001000B
+#define        RNDIS_STATUS_MEDIA_DISCONNECT   0x4001000C
+#define RNDIS_STATUS_HARDWARE_LINE_UP  0x4001000D
+#define RNDIS_STATUS_HARDWARE_LINE_DOWN        0x4001000E
+#define RNDIS_STATUS_INTERFACE_UP      0x4001000F
+#define RNDIS_STATUS_INTERFACE_DOWN    0x40010010
+#define RNDIS_STATUS_MEDIA_BUSY                0x40010011
+#define        RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION  0x40010012
+#define RNDIS_STATUS_WW_INDICATION     RDIA_SPECIFIC_INDICATION
+#define RNDIS_STATUS_LINK_SPEED_CHANGE 0x40010013
+#define RNDIS_STATUS_NETWORK_CHANGE    0x40010018
+#define        RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG 0x40020006
+
+#define        RNDIS_STATUS_FAILURE            0xC0000001
+#define RNDIS_STATUS_RESOURCES         0xC000009A
+#define        RNDIS_STATUS_NOT_SUPPORTED      0xC00000BB
+#define RNDIS_STATUS_CLOSING           0xC0010002
+#define RNDIS_STATUS_BAD_VERSION       0xC0010004
+#define RNDIS_STATUS_BAD_CHARACTERISTICS 0xC0010005
+#define RNDIS_STATUS_ADAPTER_NOT_FOUND 0xC0010006
+#define RNDIS_STATUS_OPEN_FAILED       0xC0010007
+#define RNDIS_STATUS_DEVICE_FAILED     0xC0010008
+#define RNDIS_STATUS_MULTICAST_FULL    0xC0010009
+#define RNDIS_STATUS_MULTICAST_EXISTS  0xC001000A
+#define RNDIS_STATUS_MULTICAST_NOT_FOUND 0xC001000B
+#define RNDIS_STATUS_REQUEST_ABORTED   0xC001000C
+#define RNDIS_STATUS_RESET_IN_PROGRESS 0xC001000D
+#define RNDIS_STATUS_CLOSING_INDICATING        0xC001000E
+#define RNDIS_STATUS_INVALID_PACKET    0xC001000F
+#define RNDIS_STATUS_OPEN_LIST_FULL    0xC0010010
+#define RNDIS_STATUS_ADAPTER_NOT_READY 0xC0010011
+#define RNDIS_STATUS_ADAPTER_NOT_OPEN  0xC0010012
+#define RNDIS_STATUS_NOT_INDICATING    0xC0010013
+#define RNDIS_STATUS_INVALID_LENGTH    0xC0010014
+#define        RNDIS_STATUS_INVALID_DATA       0xC0010015
+#define RNDIS_STATUS_BUFFER_TOO_SHORT  0xC0010016
+#define RNDIS_STATUS_INVALID_OID       0xC0010017
+#define RNDIS_STATUS_ADAPTER_REMOVED   0xC0010018
+#define RNDIS_STATUS_UNSUPPORTED_MEDIA 0xC0010019
+#define RNDIS_STATUS_GROUP_ADDRESS_IN_US 0xC001001A
+#define RNDIS_STATUS_FILE_NOT_FOUND    0xC001001B
+#define RNDIS_STATUS_ERROR_READING_FILE        0xC001001C
+#define RNDIS_STATUS_ALREADY_MAPPED    0xC001001D
+#define RNDIS_STATUS_RESOURCE_CONFLICT 0xC001001E
+#define RNDIS_STATUS_NO_CABLE          0xC001001F
+
+#define        OID_GEN_SUPPORTED_LIST          0x00010101
+#define        OID_GEN_HARDWARE_STATUS         0x00010102
+#define        OID_GEN_MEDIA_SUPPORTED         0x00010103
+#define        OID_GEN_MEDIA_IN_USE            0x00010104
+#define        OID_GEN_MAXIMUM_LOOKAHEAD       0x00010105
+#define        OID_GEN_MAXIMUM_FRAME_SIZE      0x00010106
+#define        OID_GEN_LINK_SPEED              0x00010107
+#define        OID_GEN_TRANSMIT_BUFFER_SPACE   0x00010108
+#define        OID_GEN_RECEIVE_BUFFER_SPACE    0x00010109
+#define        OID_GEN_TRANSMIT_BLOCK_SIZE     0x0001010A
+#define        OID_GEN_RECEIVE_BLOCK_SIZE      0x0001010B
+#define        OID_GEN_VENDOR_ID               0x0001010C
+#define        OID_GEN_VENDOR_DESCRIPTION      0x0001010D
+#define        OID_GEN_CURRENT_PACKET_FILTER   0x0001010E
+#define        OID_GEN_CURRENT_LOOKAHEAD       0x0001010F
+#define        OID_GEN_DRIVER_VERSION          0x00010110
+#define        OID_GEN_MAXIMUM_TOTAL_SIZE      0x00010111
+#define        OID_GEN_PROTOCOL_OPTIONS        0x00010112
+#define        OID_GEN_MAC_OPTIONS             0x00010113
+#define        OID_GEN_MEDIA_CONNECT_STATUS    0x00010114
+#define        OID_GEN_MAXIMUM_SEND_PACKETS    0x00010115
+#define        OID_GEN_VENDOR_DRIVER_VERSION   0x00010116
+#define        OID_GEN_SUPPORTED_GUIDS         0x00010117
+#define        OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118
+#define        OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119
+#define        OID_GEN_RECEIVE_SCALE_CAPABILITIES      0x00010203
+#define        OID_GEN_RECEIVE_SCALE_PARAMETERS        0x00010204
+#define        OID_GEN_MACHINE_NAME            0x0001021A
+#define        OID_GEN_RNDIS_CONFIG_PARAMETER  0x0001021B
+#define        OID_GEN_VLAN_ID                 0x0001021C
+
+#define        OID_802_3_PERMANENT_ADDRESS     0x01010101
+#define        OID_802_3_CURRENT_ADDRESS       0x01010102
+#define        OID_802_3_MULTICAST_LIST        0x01010103
+#define        OID_802_3_MAXIMUM_LIST_SIZE     0x01010104
+#define        OID_802_3_MAC_OPTIONS           0x01010105
+#define        OID_802_3_RCV_ERROR_ALIGNMENT   0x01020101
+#define        OID_802_3_XMIT_ONE_COLLISION    0x01020102
+#define        OID_802_3_XMIT_MORE_COLLISIONS  0x01020103
+#define        OID_802_3_XMIT_DEFERRED         0x01020201
+#define        OID_802_3_XMIT_MAX_COLLISIONS   0x01020202
+#define        OID_802_3_RCV_OVERRUN           0x01020203
+#define        OID_802_3_XMIT_UNDERRUN         0x01020204
+#define        OID_802_3_XMIT_HEARTBEAT_FAILURE        0x01020205
+#define        OID_802_3_XMIT_TIMES_CRS_LOST   0x01020206
+#define        OID_802_3_XMIT_LATE_COLLISIONS  0x01020207
+
+#define        OID_TCP_OFFLOAD_PARAMETERS      0xFC01020C
+#define        OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES   0xFC01020D
+
+#define        RNDIS_MEDIUM_802_3              0x00000000
+
+/* Device flags */
+#define        RNDIS_DF_CONNECTIONLESS         0x00000001
+#define        RNDIS_DF_CONNECTION_ORIENTED    0x00000002
+
+/*
+ * Common RNDIS message header.
+ */
+struct rndis_msghdr {
+       uint32_t type;
+       uint32_t len;
+};
+
+/*
+ * RNDIS data message
+ */
+#define        RNDIS_PACKET_MSG                0x00000001
+
+struct rndis_packet_msg {
+       uint32_t type;
+       uint32_t len;
+       uint32_t dataoffset;
+       uint32_t datalen;
+       uint32_t oobdataoffset;
+       uint32_t oobdatalen;
+       uint32_t oobdataelements;
+       uint32_t pktinfooffset;
+       uint32_t pktinfolen;
+       uint32_t vchandle;
+       uint32_t reserved;
+};
+
+/*
+ * Minimum value for dataoffset, oobdataoffset, and
+ * pktinfooffset.
+ */
+#define        RNDIS_PACKET_MSG_OFFSET_MIN             \
+       (sizeof(struct rndis_packet_msg) -      \
+        offsetof(struct rndis_packet_msg, dataoffset))
+
+/* Offset from the beginning of rndis_packet_msg. */
+#define        RNDIS_PACKET_MSG_OFFSET_ABS(ofs)        \
+       ((ofs) + offsetof(struct rndis_packet_msg, dataoffset))
+
+#define        RNDIS_PACKET_MSG_OFFSET_ALIGN           4
+#define        RNDIS_PACKET_MSG_OFFSET_ALIGNMASK       \
+       (RNDIS_PACKET_MSG_OFFSET_ALIGN - 1)
+
+/* Per-packet-info for RNDIS data message */
+struct rndis_pktinfo {
+       uint32_t size;
+       uint32_t type;          /* NDIS_PKTINFO_TYPE_ */
+       uint32_t offset;
+       uint8_t data[];
+};
+
+#define        RNDIS_PKTINFO_OFFSET            \
+       offsetof(struct rndis_pktinfo, data[0])
+#define        RNDIS_PKTINFO_SIZE_ALIGN        4
+#define        RNDIS_PKTINFO_SIZE_ALIGNMASK    (RNDIS_PKTINFO_SIZE_ALIGN - 1)
+
+#define        NDIS_PKTINFO_TYPE_CSUM          0
+#define        NDIS_PKTINFO_TYPE_IPSEC         1
+#define        NDIS_PKTINFO_TYPE_LSO           2
+#define        NDIS_PKTINFO_TYPE_CLASSIFY      3
+/* reserved 4 */
+#define        NDIS_PKTINFO_TYPE_SGLIST        5
+#define        NDIS_PKTINFO_TYPE_VLAN          6
+#define        NDIS_PKTINFO_TYPE_ORIG          7
+#define        NDIS_PKTINFO_TYPE_PKT_CANCELID  8
+#define        NDIS_PKTINFO_TYPE_ORIG_NBLIST   9
+#define        NDIS_PKTINFO_TYPE_CACHE_NBLIST  10
+#define        NDIS_PKTINFO_TYPE_PKT_PAD       11
+
+/* RNDIS extension */
+
+/* Per-packet hash info */
+#define NDIS_HASH_INFO_SIZE            sizeof(uint32_t)
+#define NDIS_PKTINFO_TYPE_HASHINF      NDIS_PKTINFO_TYPE_ORIG_NBLIST
+/* NDIS_HASH_ */
+
+/* Per-packet hash value */
+#define NDIS_HASH_VALUE_SIZE           sizeof(uint32_t)
+#define NDIS_PKTINFO_TYPE_HASHVAL      NDIS_PKTINFO_TYPE_PKT_CANCELID
+
+/* Per-packet-info size */
+#define RNDIS_PKTINFO_SIZE(dlen)       offsetof(struct rndis_pktinfo, data[dlen])
+
+/*
+ * RNDIS control messages
+ */
+
+/*
+ * Common header for RNDIS completion messages.
+ *
+ * NOTE: It does not apply to RNDIS_RESET_CMPLT.
+ */
+struct rndis_comp_hdr {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t status;
+};
+
+/* Initialize the device. */
+#define        RNDIS_INITIALIZE_MSG    0x00000002
+#define        RNDIS_INITIALIZE_CMPLT  0x80000002
+
+struct rndis_init_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t ver_major;
+       uint32_t ver_minor;
+       uint32_t max_xfersz;
+};
+
+struct rndis_init_comp {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t status;
+       uint32_t ver_major;
+       uint32_t ver_minor;
+       uint32_t devflags;
+       uint32_t medium;
+       uint32_t pktmaxcnt;
+       uint32_t pktmaxsz;
+       uint32_t align;
+       uint32_t aflistoffset;
+       uint32_t aflistsz;
+};
+
+#define        RNDIS_INIT_COMP_SIZE_MIN        \
+       offsetof(struct rndis_init_comp, aflistsz)
+
+/* Halt the device.  No response sent. */
+#define        RNDIS_HALT_MSG          0x00000003
+
+struct rndis_halt_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+};
+
+/* Send a query object. */
+#define        RNDIS_QUERY_MSG         0x00000004
+#define        RNDIS_QUERY_CMPLT       0x80000004
+
+struct rndis_query_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t oid;
+       uint32_t infobuflen;
+       uint32_t infobufoffset;
+       uint32_t devicevchdl;
+};
+
+#define        RNDIS_QUERY_REQ_INFOBUFOFFSET           \
+       (sizeof(struct rndis_query_req) -       \
+        offsetof(struct rndis_query_req, rid))
+
+struct rndis_query_comp {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t status;
+       uint32_t infobuflen;
+       uint32_t infobufoffset;
+};
+
+/* infobuf offset from the beginning of rndis_query_comp. */
+#define        RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs) \
+       ((ofs) + offsetof(struct rndis_query_comp, rid))
+
+/* Send a set object request. */
+#define        RNDIS_SET_MSG           0x00000005
+#define        RNDIS_SET_CMPLT         0x80000005
+
+struct rndis_set_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t oid;
+       uint32_t infobuflen;
+       uint32_t infobufoffset;
+       uint32_t devicevchdl;
+};
+
+#define        RNDIS_SET_REQ_INFOBUFOFFSET             \
+       (sizeof(struct rndis_set_req) -         \
+        offsetof(struct rndis_set_req, rid))
+
+struct rndis_set_comp {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t status;
+};
+
+/*
+ * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER.
+ */
+#define        RNDIS_SET_PARAM_NUMERIC 0x00000000
+#define        RNDIS_SET_PARAM_STRING  0x00000002
+
+struct rndis_set_parameter {
+       uint32_t nameoffset;
+       uint32_t namelen;
+       uint32_t type;
+       uint32_t valueoffset;
+       uint32_t valuelen;
+};
+
+/* Perform a soft reset on the device. */
+#define        RNDIS_RESET_MSG         0x00000006
+#define        RNDIS_RESET_CMPLT               0x80000006
+
+struct rndis_reset_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+};
+
+struct rndis_reset_comp {
+       uint32_t type;
+       uint32_t len;
+       uint32_t status;
+       uint32_t adrreset;
+};
+
+/* 802.3 link-state or undefined message error.  Sent by device. */
+#define        RNDIS_INDICATE_STATUS_MSG       0x00000007
+
+struct rndis_status_msg {
+       uint32_t type;
+       uint32_t len;
+       uint32_t status;
+       uint32_t stbuflen;
+       uint32_t stbufoffset;
+       /* rndis_diag_info */
+};
+
+/* stbuf offset from the beginning of rndis_status_msg. */
+#define        RNDIS_STBUFOFFSET_ABS(ofs)      \
+       ((ofs) + offsetof(struct rndis_status_msg, status))
+
+/*
+ * Immediately after rndis_status_msg.stbufoffset, if a control
+ * message is malformatted, or a packet message contains inappropriate
+ * content.
+ */
+struct rndis_diag_info {
+       uint32_t diagstatus;
+       uint32_t erroffset;
+};
+
+/* Keepalive message.  May be sent by device. */
+#define        RNDIS_KEEPALIVE_MSG     0x00000008
+#define        RNDIS_KEEPALIVE_CMPLT   0x80000008
+
+struct rndis_keepalive_req {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+};
+
+struct rndis_keepalive_comp {
+       uint32_t type;
+       uint32_t len;
+       uint32_t rid;
+       uint32_t status;
+};
+
+/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */
+#define        NDIS_PACKET_TYPE_NONE                   0x00000000
+#define        NDIS_PACKET_TYPE_DIRECTED               0x00000001
+#define        NDIS_PACKET_TYPE_MULTICAST              0x00000002
+#define        NDIS_PACKET_TYPE_ALL_MULTICAST          0x00000004
+#define        NDIS_PACKET_TYPE_BROADCAST              0x00000008
+#define        NDIS_PACKET_TYPE_SOURCE_ROUTING         0x00000010
+#define        NDIS_PACKET_TYPE_PROMISCUOUS            0x00000020
+#define        NDIS_PACKET_TYPE_SMT                    0x00000040
+#define        NDIS_PACKET_TYPE_ALL_LOCAL              0x00000080
+#define        NDIS_PACKET_TYPE_GROUP                  0x00001000
+#define        NDIS_PACKET_TYPE_ALL_FUNCTIONAL         0x00002000
+#define        NDIS_PACKET_TYPE_FUNCTIONAL             0x00004000
+#define        NDIS_PACKET_TYPE_MAC_FRAME              0x00008000
+
+#endif /* !_NET_RNDIS_H_ */
diff --git a/drivers/net/netvsc/rte_pmd_netvsc_version.map b/drivers/net/netvsc/rte_pmd_netvsc_version.map
new file mode 100644 (file)
index 0000000..d534019
--- /dev/null
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+DPDK_18.08 {
+       local: *;
+};
index 3e74768..d21fb57 100644 (file)
@@ -177,6 +177,7 @@ endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMBUS)          += -lrte_bus_vmbus
+_LDLIBS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD)     += -lrte_pmd_netvsc
 
 ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL)     += -lrte_pmd_bbdev_null