Integrate accelerated networking support into netvsc PMD.
This allows netvsc to manage VF without using failsafe or vdev_netvsc.
For the exception vswitch path some tests like transmit
get a 22% increase in packets/sec.
For the VF path, the code is slightly shorter but has no
real change in performance.
Pro:
* using netvsc is more like other DPDK NIC's
* the exception packet uses less CPU
* much smaller code size
* no locking required on VF transmit/receive path
* no legacy Linux network device to get mangled by userspace
* much simpler (1K vs 9K) LOC
* unified extended statistics
Con:
* using netvsc has more complex startup model
* no bifurcated driver support
* no flow support (since host does not have flow API).
* no tunnel offload support
* no receive interrupt support
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
* The maximum number of queues is limited by the host (currently 64).
When used with 4.16 kernel only a single queue is available.
-.. note::
- This driver is intended for use with **Hyper-V only** and is
- not recommended for use on Azure because accelerated Networking
- (SR-IOV) is not supported.
-
- On Azure, use the :doc:`vdev_netvsc` which
- automatically configures the necessary TAP and failsave drivers.
-
+* This driver supports SR-IOV network acceleration.
+ If SR-IOV is enabled then the driver will transparently manage the interface,
+ and send and receive packets using the VF path.
+ The VDEV_NETVSC and FAILSAFE drivers are *not* used when using netvsc PMD.
Installation
------------
* Match items: destination MAC address.
* Action items: push/pop/rewrite vlan header.
+* **Added support for SR-IOV in netvsc PMD.**
+
+ The ``netvsc`` poll mode driver now supports the Accelerated Networking
+ SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
+ vdev_netvsc, tap, and failsafe drivers combination.
+
API Changes
-----------
librte_bus_fslmc.so.1
librte_bus_pci.so.1
librte_bus_vdev.so.1
- librte_bus_vmbus.so.1
+ + librte_bus_vmbus.so.1
librte_cfgfile.so.2
librte_cmdline.so.2
librte_common_octeontx.so.1
librte_pmd_ring.so.2
librte_pmd_softnic.so.1
librte_pmd_vhost.so.2
+ + librte_pmd_netvsc.so.1
librte_port.so.3
librte_power.so.1
librte_rawdev.so.1
Also, make sure to start the actual text at the margin.
=========================================================
+* When using SR-IOV (VF) support with netvsc PMD and the Mellanox mlx5 bifurcated
+ driver; the Linux netvsc device must be brought up before the netvsc device is
+ unbound and passed to the DPDK.
+
Tested Platforms
----------------
This section is a comment. Do not overwrite or remove it.
Also, make sure to start the actual text at the margin.
=========================================================
-
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c
LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
*/
int
hn_dev_link_update(struct rte_eth_dev *dev,
- __rte_unused int wait_to_complete)
+ int wait_to_complete)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_link link, old;
hn_rndis_get_linkspeed(hv);
+ hn_vf_link_update(dev, wait_to_complete);
+
link = (struct rte_eth_link) {
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_autoneg = ETH_LINK_SPEED_FIXED,
dev_info->max_tx_queues = hv->max_queues;
hn_rndis_get_offload(hv, dev_info);
+ hn_vf_info_get(hv, dev_info);
}
static void
}
}
- return 0;
+ return hn_vf_configure(dev, dev_conf);
}
static int hn_dev_stats_get(struct rte_eth_dev *dev,
{
unsigned int i;
+ hn_vf_stats_get(dev, stats);
+
for (i = 0; i < dev->data->nb_tx_queues; i++) {
const struct hn_tx_queue *txq = dev->data->tx_queues[i];
}
}
+static void
+hn_dev_xstats_reset(struct rte_eth_dev *dev)
+{
+ hn_dev_stats_reset(dev);
+ hn_vf_xstats_reset(dev);
+}
+
+static int
+hn_dev_xstats_count(struct rte_eth_dev *dev)
+{
+ int ret, count;
+
+ count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings);
+ count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+
+ ret = hn_vf_xstats_get_names(dev, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
+}
+
static int
hn_dev_xstats_get_names(struct rte_eth_dev *dev,
struct rte_eth_xstat_name *xstats_names,
- __rte_unused unsigned int limit)
+ unsigned int limit)
{
unsigned int i, t, count = 0;
-
- PMD_INIT_FUNC_TRACE();
+ int ret;
if (!xstats_names)
- return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
- + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+ return hn_dev_xstats_count(dev);
/* Note: limit checked in rte_eth_xstats_names() */
for (i = 0; i < dev->data->nb_tx_queues; i++) {
if (!txq)
continue;
+ if (count >= limit)
+ break;
+
for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
snprintf(xstats_names[count++].name,
RTE_ETH_XSTATS_NAME_SIZE,
if (!rxq)
continue;
+ if (count >= limit)
+ break;
+
for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
snprintf(xstats_names[count++].name,
RTE_ETH_XSTATS_NAME_SIZE,
hn_stat_strings[t].name);
}
- return count;
+ ret = hn_vf_xstats_get_names(dev, xstats_names + count,
+ limit - count);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
}
static int
unsigned int n)
{
unsigned int i, t, count = 0;
-
- const unsigned int nstats =
- dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
- + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+ const unsigned int nstats = hn_dev_xstats_count(dev);
const char *stats;
+ int ret;
PMD_INIT_FUNC_TRACE();
(stats + hn_stat_strings[t].offset);
}
- return count;
+ ret = hn_vf_xstats_get(dev, xstats + count, n - count);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
}
static int
hn_dev_start(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
+ int error;
PMD_INIT_FUNC_TRACE();
- return hn_rndis_set_rxfilter(hv,
- NDIS_PACKET_TYPE_BROADCAST |
- NDIS_PACKET_TYPE_ALL_MULTICAST |
- NDIS_PACKET_TYPE_DIRECTED);
+ error = hn_rndis_set_rxfilter(hv,
+ NDIS_PACKET_TYPE_BROADCAST |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_DIRECTED);
+ if (error)
+ return error;
+
+ error = hn_vf_start(dev);
+ if (error)
+ hn_rndis_set_rxfilter(hv, 0);
+
+ return error;
}
static void
PMD_INIT_FUNC_TRACE();
hn_rndis_set_rxfilter(hv, 0);
+ hn_vf_stop(dev);
}
static void
hn_dev_close(struct rte_eth_dev *dev __rte_unused)
{
PMD_INIT_LOG(DEBUG, "close");
+
+ hn_vf_close(dev);
}
static const struct eth_dev_ops hn_eth_dev_ops = {
.dev_stop = hn_dev_stop,
.dev_close = hn_dev_close,
.dev_infos_get = hn_dev_info_get,
- .txq_info_get = hn_dev_tx_queue_info,
- .rxq_info_get = hn_dev_rx_queue_info,
+ .dev_supported_ptypes_get = hn_vf_supported_ptypes,
.promiscuous_enable = hn_dev_promiscuous_enable,
.promiscuous_disable = hn_dev_promiscuous_disable,
.allmulticast_enable = hn_dev_allmulticast_enable,
.rx_queue_release = hn_dev_rx_queue_release,
.link_update = hn_dev_link_update,
.stats_get = hn_dev_stats_get,
+ .stats_reset = hn_dev_stats_reset,
.xstats_get = hn_dev_xstats_get,
.xstats_get_names = hn_dev_xstats_get_names,
- .stats_reset = hn_dev_stats_reset,
- .xstats_reset = hn_dev_stats_reset,
+ .xstats_reset = hn_dev_xstats_reset,
};
/*
if (err)
return err;
+ strlcpy(hv->owner.name, eth_dev->device->name,
+ RTE_ETH_MAX_OWNER_NAME_LEN);
+ err = rte_eth_dev_owner_new(&hv->owner.id);
+ if (err) {
+ PMD_INIT_LOG(ERR, "Can not get owner id");
+ return err;
+ }
+
/* Initialize primary channel input for control operations */
err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
if (err)
hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
+ /* If VF was reported but not added, do it now */
+ if (hv->vf_present && !hv->vf_dev) {
+ PMD_INIT_LOG(DEBUG, "Adding VF device");
+
+ err = hn_vf_add(eth_dev, hv);
+ if (err)
+ goto failed;
+ }
+
return 0;
failed:
hn_detach(hv);
rte_vmbus_chan_close(hv->primary->chan);
rte_free(hv->primary);
+ rte_eth_dev_owner_delete(hv->owner.id);
eth_dev->data->mac_addrs = NULL;
hn_nvs_set_datapath(struct hn_data *hv, uint32_t path)
{
struct hn_nvs_datapath dp;
+ int error;
+
+ PMD_DRV_LOG(DEBUG, "set datapath %s",
+ path ? "VF" : "Synthetic");
memset(&dp, 0, sizeof(dp));
dp.type = NVS_TYPE_SET_DATAPATH;
dp.active_path = path;
- hn_nvs_req_send(hv, &dp, sizeof(dp));
+ error = hn_nvs_req_send(hv, &dp, sizeof(dp));
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "send set datapath failed: %d",
+ error);
+ }
}
uint8_t rsvd[28];
} __rte_packed;
+struct hn_nvs_vf_association {
+ uint32_t type; /* NVS_TYPE_VFASSOC_NOTE */
+ uint32_t allocated;
+ uint32_t serial;
+} __rte_packed;
+
#define NVS_DATAPATH_SYNTHETIC 0
#define NVS_DATAPATH_VF 1
void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch);
void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path);
+void hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *hdr,
+ const void *data);
static inline int
hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,
return 0;
}
+uint32_t
+hn_rndis_get_ptypes(struct hn_data *hv)
+{
+ struct ndis_offload hwcaps;
+ uint32_t ptypes;
+ int error;
+
+ memset(&hwcaps, 0, sizeof(hwcaps));
+
+ error = hn_rndis_query_hwcaps(hv, &hwcaps);
+ if (error) {
+ PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+ return RTE_PTYPE_L2_ETHER;
+ }
+
+ ptypes = RTE_PTYPE_L2_ETHER;
+
+ if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+ ptypes |= RTE_PTYPE_L3_IPV4;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) ||
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
+ ptypes |= RTE_PTYPE_L4_TCP;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) ||
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
+ ptypes |= RTE_PTYPE_L4_UDP;
+
+ return ptypes;
+}
+
int
hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter)
{
unsigned int *rxr_cnt0);
int hn_rndis_conf_rss(struct hn_data *hv,
const struct rte_eth_rss_conf *rss_conf);
+uint32_t hn_rndis_get_ptypes(struct hn_data *hv);
#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
void hn_rndis_dump(const void *buf);
struct hn_data *hv = dev->data->dev_private;
struct hn_tx_queue *txq;
uint32_t tx_free_thresh;
+ int err;
PMD_INIT_FUNC_TRACE();
hn_reset_txagg(txq);
- dev->data->tx_queues[queue_idx] = txq;
+ err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
+ socket_id, tx_conf);
+ if (err) {
+ rte_free(txq);
+ return err;
+ }
+ dev->data->tx_queues[queue_idx] = txq;
return 0;
}
rte_free(txq);
}
-void
-hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
- struct rte_eth_txq_info *qinfo)
-{
- struct hn_data *hv = dev->data->dev_private;
- struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx];
-
- qinfo->conf.tx_free_thresh = txq->free_thresh;
- qinfo->nb_desc = hv->tx_pool->size;
-}
-
static void
hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
hn_rx_buf_release(rxb);
}
+/*
+ * Called when NVS inband events are received.
+ * Send up a two part message with port_id and the NVS message
+ * to the pipe to the netvsc-vf-event control thread.
+ */
+static void hn_nvs_handle_notify(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *pkt,
+ const void *data)
+{
+ const struct hn_nvs_hdr *hdr = data;
+
+ switch (hdr->type) {
+ case NVS_TYPE_TXTBL_NOTE:
+ /* Transmit indirection table has locking problems
+ * in DPDK and therefore not implemented
+ */
+ PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table");
+ break;
+
+ case NVS_TYPE_VFASSOC_NOTE:
+ hn_nvs_handle_vfassoc(dev, pkt, data);
+ break;
+
+ default:
+ PMD_DRV_LOG(INFO,
+ "got notify, nvs type %u", hdr->type);
+ }
+}
+
struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
uint16_t queue_id,
unsigned int socket_id)
hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
- const struct rte_eth_rxconf *rx_conf __rte_unused,
+ const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
struct hn_data *hv = dev->data->dev_private;
char ring_name[RTE_RING_NAMESIZE];
struct hn_rx_queue *rxq;
unsigned int count;
+ int error = -ENOMEM;
PMD_INIT_FUNC_TRACE();
if (!rxq->rx_ring)
goto fail;
+ error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc,
+ socket_id, rx_conf, mp);
+ if (error)
+ goto fail;
+
dev->data->rx_queues[queue_idx] = rxq;
return 0;
rte_ring_free(rxq->rx_ring);
rte_free(rxq->event_buf);
rte_free(rxq);
- return -ENOMEM;
+ return error;
}
void
rxq->rx_ring = NULL;
rxq->mb_pool = NULL;
+ hn_vf_rx_queue_release(rxq->hv, rxq->queue_id);
+
+ /* Keep primary queue to allow for control operations */
if (rxq != rxq->hv->primary) {
rte_free(rxq->event_buf);
rte_free(rxq);
return hn_process_events(txq->hv, txq->queue_id, free_cnt);
}
-void
-hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
- struct rte_eth_rxq_info *qinfo)
-{
- struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx];
-
- qinfo->mp = rxq->mb_pool;
- qinfo->scattered_rx = 1;
- qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring);
-}
-
-static void
-hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
- const void *data)
-{
- const struct hn_nvs_hdr *hdr = data;
-
- if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
- PMD_DRV_LOG(ERR, "invalid nvs notify");
- return;
- }
-
- PMD_DRV_LOG(INFO,
- "got notify, nvs type %u", hdr->type);
-}
-
/*
* Process pending events on the channel.
* Called from both Rx queue poll and Tx cleanup
break;
case VMBUS_CHANPKT_TYPE_INBAND:
- hn_nvs_handle_notify(pkt, data);
+ hn_nvs_handle_notify(dev, pkt, data);
break;
default:
hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct hn_tx_queue *txq = ptxq;
+ uint16_t queue_id = txq->queue_id;
struct hn_data *hv = txq->hv;
+ struct rte_eth_dev *vf_dev;
bool need_sig = false;
uint16_t nb_tx;
int ret;
if (unlikely(hv->closed))
return 0;
+ /* Transmit over VF if present and up */
+ vf_dev = hv->vf_dev;
+ rte_compiler_barrier();
+ if (vf_dev && vf_dev->data->dev_started) {
+ void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+ return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
+ }
+
if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
hn_process_events(hv, txq->queue_id, 0);
if (unlikely(!pkt))
break;
- hn_encap(pkt, txq->queue_id, m);
+ hn_encap(pkt, queue_id, m);
hn_append_to_chim(txq, pkt, m);
rte_pktmbuf_free(m);
txd->data_size += m->pkt_len;
++txd->packets;
- hn_encap(pkt, txq->queue_id, m);
+ hn_encap(pkt, queue_id, m);
ret = hn_xmit_sg(txq, txd, m, &need_sig);
if (unlikely(ret != 0)) {
{
struct hn_rx_queue *rxq = prxq;
struct hn_data *hv = rxq->hv;
+ struct rte_eth_dev *vf_dev;
+ uint16_t nb_rcv;
if (unlikely(hv->closed))
return 0;
- /* If ring is empty then process more */
- if (rte_ring_count(rxq->rx_ring) < nb_pkts)
+ vf_dev = hv->vf_dev;
+ rte_compiler_barrier();
+
+ if (vf_dev && vf_dev->data->dev_started) {
+ /* Normally, with SR-IOV the ring buffer will be empty */
hn_process_events(hv, rxq->queue_id, 0);
- /* Get mbufs off staging ring */
- return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
- nb_pkts, NULL);
+ /* Get mbufs some bufs off of staging ring */
+ nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+ (void **)rx_pkts,
+ nb_pkts / 2, NULL);
+ /* And rest off of VF */
+ nb_rcv += rte_eth_rx_burst(vf_dev->data->port_id,
+ rxq->queue_id,
+ rx_pkts + nb_rcv, nb_pkts - nb_rcv);
+ } else {
+ /* If receive ring is not full then get more */
+ if (rte_ring_count(rxq->rx_ring) < nb_pkts)
+ hn_process_events(hv, rxq->queue_id, 0);
+
+ nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+ (void **)rx_pkts,
+ nb_pkts, NULL);
+ }
+
+ return nb_rcv;
}
struct hn_data {
struct rte_vmbus_device *vmbus;
struct hn_rx_queue *primary;
+ struct rte_eth_dev *vf_dev; /* Subordinate device */
+ rte_spinlock_t vf_lock;
uint16_t port_id;
bool closed;
+ bool vf_present;
uint32_t link_status;
uint32_t link_speed;
uint8_t rndis_resp[256];
struct ether_addr mac_addr;
+
+ struct rte_eth_dev_owner owner;
+ struct rte_intr_handle vf_intr;
+
struct vmbus_channel *channels[HN_MAX_CHANNELS];
};
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp);
void hn_dev_rx_queue_release(void *arg);
-void hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
- struct rte_eth_rxq_info *qinfo);
+
+void hn_vf_info_get(struct hn_data *hv,
+ struct rte_eth_dev_info *info);
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int hn_vf_configure(struct rte_eth_dev *dev,
+ const struct rte_eth_conf *dev_conf);
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev);
+int hn_vf_start(struct rte_eth_dev *dev);
+void hn_vf_reset(struct rte_eth_dev *dev);
+void hn_vf_stop(struct rte_eth_dev *dev);
+void hn_vf_close(struct rte_eth_dev *dev);
+int hn_vf_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete);
+int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id);
+int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp);
+void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id);
+
+int hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+void hn_vf_stats_reset(struct rte_eth_dev *dev);
+int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ unsigned int size);
+int hn_vf_xstats_get(struct rte_eth_dev *dev,
+ struct rte_eth_xstat *xstats,
+ unsigned int n);
+void hn_vf_xstats_reset(struct rte_eth_dev *dev);
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_bus_vmbus.h>
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+
+/* Search for VF with matching MAC address, return port id */
+static int hn_vf_match(const struct rte_eth_dev *dev)
+{
+ const struct ether_addr *mac = dev->data->mac_addrs;
+ char buf[32];
+ int i;
+
+ ether_format_addr(buf, sizeof(buf), mac);
+ RTE_ETH_FOREACH_DEV(i) {
+ const struct rte_eth_dev *vf_dev = &rte_eth_devices[i];
+ const struct ether_addr *vf_mac = vf_dev->data->mac_addrs;
+
+ if (vf_dev == dev)
+ continue;
+
+ ether_format_addr(buf, sizeof(buf), vf_mac);
+ if (is_same_ether_addr(mac, vf_mac))
+ return i;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Attach new PCI VF device and return the port_id
+ */
+static int hn_vf_attach(struct hn_data *hv, uint16_t port_id,
+ struct rte_eth_dev **vf_dev)
+{
+ struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER };
+ int ret;
+
+ ret = rte_eth_dev_owner_get(port_id, &owner);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id);
+ return ret;
+ }
+
+ if (owner.id != RTE_ETH_DEV_NO_OWNER) {
+ PMD_DRV_LOG(ERR, "Port %u already owned by other device %s",
+ port_id, owner.name);
+ return -EBUSY;
+ }
+
+ ret = rte_eth_dev_owner_set(port_id, &hv->owner);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id);
+ return ret;
+ }
+
+ PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id);
+ rte_smp_wmb();
+ *vf_dev = &rte_eth_devices[port_id];
+ return 0;
+}
+
+/* Add new VF device to synthetic device */
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
+{
+ int port, err;
+
+ port = hn_vf_match(dev);
+ if (port < 0) {
+ PMD_DRV_LOG(NOTICE, "No matching MAC found");
+ return port;
+ }
+
+ rte_spinlock_lock(&hv->vf_lock);
+ if (hv->vf_dev) {
+ PMD_DRV_LOG(ERR, "VF already attached");
+ err = -EBUSY;
+ } else {
+ err = hn_vf_attach(hv, port, &hv->vf_dev);
+ }
+
+ if (err == 0) {
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+ hv->vf_intr = (struct rte_intr_handle) {
+ .fd = -1,
+ .type = RTE_INTR_HANDLE_EXT,
+ };
+ dev->intr_handle = &hv->vf_intr;
+ hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
+ }
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return err;
+}
+
+/* Remove new VF device */
+static void hn_vf_remove(struct hn_data *hv)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (!vf_dev) {
+ PMD_DRV_LOG(ERR, "VF path not active");
+ rte_spinlock_unlock(&hv->vf_lock);
+ return;
+ }
+
+ /* Stop incoming packets from arriving on VF */
+ hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
+ hv->vf_dev = NULL;
+
+ /* Give back ownership */
+ rte_eth_dev_owner_unset(vf_dev->data->port_id, hv->owner.id);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* Handle VF association message from host */
+void
+hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *hdr,
+ const void *data)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ const struct hn_nvs_vf_association *vf_assoc = data;
+
+ if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) {
+ PMD_DRV_LOG(ERR, "invalid vf association NVS");
+ return;
+ }
+
+ PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u",
+ vf_assoc->serial,
+ vf_assoc->allocated ? "add to" : "remove from",
+ dev->data->port_id);
+
+ hv->vf_present = vf_assoc->allocated;
+
+ if (dev->state != RTE_ETH_DEV_ATTACHED)
+ return;
+
+ if (vf_assoc->allocated)
+ hn_vf_add(dev, hv);
+ else
+ hn_vf_remove(hv);
+}
+
+/*
+ * Merge the info from the VF and synthetic path.
+ * use the default config of the VF
+ * and the minimum number of queues and buffer sizes.
+ */
+static void hn_vf_info_merge(struct rte_eth_dev *vf_dev,
+ struct rte_eth_dev_info *info)
+{
+ struct rte_eth_dev_info vf_info;
+
+ rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info);
+
+ info->speed_capa = vf_info.speed_capa;
+ info->default_rxportconf = vf_info.default_rxportconf;
+ info->default_txportconf = vf_info.default_txportconf;
+
+ info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues,
+ info->max_rx_queues);
+ info->rx_offload_capa &= vf_info.rx_offload_capa;
+ info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa;
+ info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads;
+
+ info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues,
+ info->max_tx_queues);
+ info->tx_offload_capa &= vf_info.tx_offload_capa;
+ info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+
+ info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
+ info->min_rx_bufsize);
+ info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen,
+ info->max_rx_pktlen);
+}
+
+void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ hn_vf_info_merge(vf_dev, info);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->link_update)
+ ret = (*vf_dev->dev_ops->link_update)(dev, wait_to_complete);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return ret;
+}
+
+/* called when VF has link state interrupts enabled */
+static int hn_vf_lsc_event(uint16_t port_id __rte_unused,
+ enum rte_eth_event_type event,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct rte_eth_dev *dev = cb_arg;
+
+ if (event != RTE_ETH_EVENT_INTR_LSC)
+ return 0;
+
+ /* if link state has changed pass on */
+ if (hn_dev_link_update(dev, 0) == 0)
+ return 0; /* no change */
+
+ return _rte_eth_dev_callback_process(dev,
+ RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+}
+
+static int _hn_vf_configure(struct rte_eth_dev *dev,
+ struct rte_eth_dev *vf_dev,
+ const struct rte_eth_conf *dev_conf)
+{
+ struct rte_eth_conf vf_conf = *dev_conf;
+ uint16_t vf_port = vf_dev->data->port_id;
+ int ret;
+
+ if (dev_conf->intr_conf.lsc &&
+ (vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+ PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u",
+ vf_port);
+ vf_conf.intr_conf.lsc = 1;
+ } else {
+ PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u",
+ vf_port);
+ vf_conf.intr_conf.lsc = 0;
+ }
+
+ ret = rte_eth_dev_configure(vf_port,
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &vf_conf);
+ if (ret) {
+ PMD_DRV_LOG(ERR,
+ "VF configuration failed: %d", ret);
+ } else if (vf_conf.intr_conf.lsc) {
+ ret = rte_eth_dev_callback_register(vf_port,
+ RTE_ETH_DEV_INTR_LSC,
+ hn_vf_lsc_event, dev);
+ if (ret)
+ PMD_DRV_LOG(ERR,
+ "Failed to register LSC callback for VF %u",
+ vf_port);
+ }
+ return ret;
+}
+
+/*
+ * Configure VF if present.
+ * Force VF to have same number of queues as synthetic device
+ */
+int hn_vf_configure(struct rte_eth_dev *dev,
+ const struct rte_eth_conf *dev_conf)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ ret = _hn_vf_configure(dev, vf_dev, dev_conf);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ const uint32_t *ptypes = NULL;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get)
+ ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return ptypes;
+}
+
+int hn_vf_start(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ ret = rte_eth_dev_start(vf_dev->data->port_id);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_stop(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ rte_eth_dev_stop(vf_dev->data->port_id);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* If VF is present, then cascade configuration down */
+#define VF_ETHDEV_FUNC(dev, func) \
+ { \
+ struct hn_data *hv = (dev)->data->dev_private; \
+ struct rte_eth_dev *vf_dev; \
+ rte_spinlock_lock(&hv->vf_lock); \
+ vf_dev = hv->vf_dev; \
+ if (vf_dev) \
+ func(vf_dev->data->port_id); \
+ rte_spinlock_unlock(&hv->vf_lock); \
+ }
+
+void hn_vf_reset(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_dev_reset);
+}
+
+void hn_vf_close(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_dev_close);
+}
+
+void hn_vf_stats_reset(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_stats_reset);
+}
+
+int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ ret = rte_eth_tx_queue_setup(vf_dev->data->port_id,
+ queue_idx, nb_desc,
+ socket_id, tx_conf);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->tx_queue_release) {
+ void *subq = vf_dev->data->tx_queues[queue_id];
+
+ (*vf_dev->dev_ops->tx_queue_release)(subq);
+ }
+
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ ret = rte_eth_rx_queue_setup(vf_dev->data->port_id,
+ queue_idx, nb_desc,
+ socket_id, rx_conf, mp);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->rx_queue_release) {
+ void *subq = vf_dev->data->rx_queues[queue_id];
+
+ (*vf_dev->dev_ops->rx_queue_release)(subq);
+ }
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev)
+ ret = rte_eth_stats_get(vf_dev->data->port_id, stats);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *names,
+ unsigned int n)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int i, count = 0;
+ char tmp[RTE_ETH_XSTATS_NAME_SIZE];
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->xstats_get_names)
+ count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ /* add vf_ prefix to xstat names */
+ if (names) {
+ for (i = 0; i < count; i++) {
+ snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name);
+ strlcpy(names[i].name, tmp, sizeof(names[i].name));
+ }
+ }
+
+ return count;
+}
+
+int hn_vf_xstats_get(struct rte_eth_dev *dev,
+ struct rte_eth_xstat *xstats,
+ unsigned int n)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int count = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->xstats_get)
+ count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return count;
+}
+
+void hn_vf_xstats_reset(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hv->vf_dev;
+ if (vf_dev && vf_dev->dev_ops->xstats_reset)
+ vf_dev->dev_ops->xstats_reset(vf_dev);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS')
version = 2
-sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c')
+sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c')
deps += ['bus_vmbus' ]