From dc7680e8597c227b3ba9becbf21cefd51b2b8577 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 30 Aug 2018 15:35:12 -0700 Subject: [PATCH] net/netvsc: support integrated VF Integrate accelerated networking support into netvsc PMD. This allows netvsc to manage VF without using failsafe or vdev_netvsc. For the exception vswitch path some tests like transmit get a 22% increase in packets/sec. For the VF path, the code is slightly shorter but has no real change in performance. Pro: * using netvsc is more like other DPDK NIC's * the exception packet uses less CPU * much smaller code size * no locking required on VF transmit/receive path * no legacy Linux network device to get mangled by userspace * much simpler (1K vs 9K) LOC * unified extended statistics Con: * using netvsc has more complex startup model * no bifurcated driver support * no flow support (since host does not have flow API). * no tunnel offload support * no receive interrupt support Signed-off-by: Stephen Hemminger --- doc/guides/nics/netvsc.rst | 12 +- doc/guides/rel_notes/release_18_11.rst | 14 +- drivers/net/netvsc/Makefile | 1 + drivers/net/netvsc/hn_ethdev.c | 109 +++++- drivers/net/netvsc/hn_nvs.c | 11 +- drivers/net/netvsc/hn_nvs.h | 9 + drivers/net/netvsc/hn_rndis.c | 31 ++ drivers/net/netvsc/hn_rndis.h | 1 + drivers/net/netvsc/hn_rxtx.c | 136 ++++--- drivers/net/netvsc/hn_var.h | 43 ++- drivers/net/netvsc/hn_vf.c | 512 +++++++++++++++++++++++++ drivers/net/netvsc/meson.build | 2 +- 12 files changed, 798 insertions(+), 83 deletions(-) create mode 100644 drivers/net/netvsc/hn_vf.c diff --git a/doc/guides/nics/netvsc.rst b/doc/guides/nics/netvsc.rst index c5f9b7c6fa..87fabf5b8c 100644 --- a/doc/guides/nics/netvsc.rst +++ b/doc/guides/nics/netvsc.rst @@ -34,14 +34,10 @@ In this release, the hyper PMD driver provides the basic functionality of packet * The maximum number of queues is limited by the host (currently 64). When used with 4.16 kernel only a single queue is available. -.. note:: - This driver is intended for use with **Hyper-V only** and is - not recommended for use on Azure because accelerated Networking - (SR-IOV) is not supported. - - On Azure, use the :doc:`vdev_netvsc` which - automatically configures the necessary TAP and failsave drivers. - +* This driver supports SR-IOV network acceleration. + If SR-IOV is enabled then the driver will transparently manage the interface, + and send and receive packets using the VF path. + The VDEV_NETVSC and FAILSAFE drivers are *not* used when using netvsc PMD. Installation ------------ diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst index 99d1047a36..8c4bb54477 100644 --- a/doc/guides/rel_notes/release_18_11.rst +++ b/doc/guides/rel_notes/release_18_11.rst @@ -61,6 +61,12 @@ New Features * Match items: destination MAC address. * Action items: push/pop/rewrite vlan header. +* **Added support for SR-IOV in netvsc PMD.** + + The ``netvsc`` poll mode driver now supports the Accelerated Networking + SR-IOV option in Hyper-V and Azure. This is an alternative to the previous + vdev_netvsc, tap, and failsafe drivers combination. + API Changes ----------- @@ -135,7 +141,7 @@ The libraries prepended with a plus sign were incremented in this version. librte_bus_fslmc.so.1 librte_bus_pci.so.1 librte_bus_vdev.so.1 - librte_bus_vmbus.so.1 + + librte_bus_vmbus.so.1 librte_cfgfile.so.2 librte_cmdline.so.2 librte_common_octeontx.so.1 @@ -172,6 +178,7 @@ The libraries prepended with a plus sign were incremented in this version. librte_pmd_ring.so.2 librte_pmd_softnic.so.1 librte_pmd_vhost.so.2 + + librte_pmd_netvsc.so.1 librte_port.so.3 librte_power.so.1 librte_rawdev.so.1 @@ -198,6 +205,10 @@ Known Issues Also, make sure to start the actual text at the margin. ========================================================= +* When using SR-IOV (VF) support with netvsc PMD and the Mellanox mlx5 bifurcated + driver; the Linux netvsc device must be brought up before the netvsc device is + unbound and passed to the DPDK. + Tested Platforms ---------------- @@ -217,4 +228,3 @@ Tested Platforms This section is a comment. Do not overwrite or remove it. Also, make sure to start the actual text at the margin. ========================================================= - diff --git a/drivers/net/netvsc/Makefile b/drivers/net/netvsc/Makefile index 3c713af3c8..71482591a9 100644 --- a/drivers/net/netvsc/Makefile +++ b/drivers/net/netvsc/Makefile @@ -15,6 +15,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index 9ea5813d34..b67cce1ba8 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -192,7 +192,7 @@ static int hn_parse_args(const struct rte_eth_dev *dev) */ int hn_dev_link_update(struct rte_eth_dev *dev, - __rte_unused int wait_to_complete) + int wait_to_complete) { struct hn_data *hv = dev->data->dev_private; struct rte_eth_link link, old; @@ -206,6 +206,8 @@ hn_dev_link_update(struct rte_eth_dev *dev, hn_rndis_get_linkspeed(hv); + hn_vf_link_update(dev, wait_to_complete); + link = (struct rte_eth_link) { .link_duplex = ETH_LINK_FULL_DUPLEX, .link_autoneg = ETH_LINK_SPEED_FIXED, @@ -244,6 +246,7 @@ static void hn_dev_info_get(struct rte_eth_dev *dev, dev_info->max_tx_queues = hv->max_queues; hn_rndis_get_offload(hv, dev_info); + hn_vf_info_get(hv, dev_info); } static void @@ -394,7 +397,7 @@ static int hn_dev_configure(struct rte_eth_dev *dev) } } - return 0; + return hn_vf_configure(dev, dev_conf); } static int hn_dev_stats_get(struct rte_eth_dev *dev, @@ -402,6 +405,8 @@ static int hn_dev_stats_get(struct rte_eth_dev *dev, { unsigned int i; + hn_vf_stats_get(dev, stats); + for (i = 0; i < dev->data->nb_tx_queues; i++) { const struct hn_tx_queue *txq = dev->data->tx_queues[i]; @@ -464,18 +469,38 @@ hn_dev_stats_reset(struct rte_eth_dev *dev) } } +static void +hn_dev_xstats_reset(struct rte_eth_dev *dev) +{ + hn_dev_stats_reset(dev); + hn_vf_xstats_reset(dev); +} + +static int +hn_dev_xstats_count(struct rte_eth_dev *dev) +{ + int ret, count; + + count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings); + count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + + ret = hn_vf_xstats_get_names(dev, NULL, 0); + if (ret < 0) + return ret; + + return count + ret; +} + static int hn_dev_xstats_get_names(struct rte_eth_dev *dev, struct rte_eth_xstat_name *xstats_names, - __rte_unused unsigned int limit) + unsigned int limit) { unsigned int i, t, count = 0; - - PMD_INIT_FUNC_TRACE(); + int ret; if (!xstats_names) - return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings) - + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + return hn_dev_xstats_count(dev); /* Note: limit checked in rte_eth_xstats_names() */ for (i = 0; i < dev->data->nb_tx_queues; i++) { @@ -484,6 +509,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev, if (!txq) continue; + if (count >= limit) + break; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) snprintf(xstats_names[count++].name, RTE_ETH_XSTATS_NAME_SIZE, @@ -496,6 +524,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev, if (!rxq) continue; + if (count >= limit) + break; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) snprintf(xstats_names[count++].name, RTE_ETH_XSTATS_NAME_SIZE, @@ -503,7 +534,12 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev, hn_stat_strings[t].name); } - return count; + ret = hn_vf_xstats_get_names(dev, xstats_names + count, + limit - count); + if (ret < 0) + return ret; + + return count + ret; } static int @@ -512,11 +548,9 @@ hn_dev_xstats_get(struct rte_eth_dev *dev, unsigned int n) { unsigned int i, t, count = 0; - - const unsigned int nstats = - dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings) - + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + const unsigned int nstats = hn_dev_xstats_count(dev); const char *stats; + int ret; PMD_INIT_FUNC_TRACE(); @@ -547,20 +581,33 @@ hn_dev_xstats_get(struct rte_eth_dev *dev, (stats + hn_stat_strings[t].offset); } - return count; + ret = hn_vf_xstats_get(dev, xstats + count, n - count); + if (ret < 0) + return ret; + + return count + ret; } static int hn_dev_start(struct rte_eth_dev *dev) { struct hn_data *hv = dev->data->dev_private; + int error; PMD_INIT_FUNC_TRACE(); - return hn_rndis_set_rxfilter(hv, - NDIS_PACKET_TYPE_BROADCAST | - NDIS_PACKET_TYPE_ALL_MULTICAST | - NDIS_PACKET_TYPE_DIRECTED); + error = hn_rndis_set_rxfilter(hv, + NDIS_PACKET_TYPE_BROADCAST | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_DIRECTED); + if (error) + return error; + + error = hn_vf_start(dev); + if (error) + hn_rndis_set_rxfilter(hv, 0); + + return error; } static void @@ -571,12 +618,15 @@ hn_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); hn_rndis_set_rxfilter(hv, 0); + hn_vf_stop(dev); } static void hn_dev_close(struct rte_eth_dev *dev __rte_unused) { PMD_INIT_LOG(DEBUG, "close"); + + hn_vf_close(dev); } static const struct eth_dev_ops hn_eth_dev_ops = { @@ -585,8 +635,7 @@ static const struct eth_dev_ops hn_eth_dev_ops = { .dev_stop = hn_dev_stop, .dev_close = hn_dev_close, .dev_infos_get = hn_dev_info_get, - .txq_info_get = hn_dev_tx_queue_info, - .rxq_info_get = hn_dev_rx_queue_info, + .dev_supported_ptypes_get = hn_vf_supported_ptypes, .promiscuous_enable = hn_dev_promiscuous_enable, .promiscuous_disable = hn_dev_promiscuous_disable, .allmulticast_enable = hn_dev_allmulticast_enable, @@ -598,10 +647,10 @@ static const struct eth_dev_ops hn_eth_dev_ops = { .rx_queue_release = hn_dev_rx_queue_release, .link_update = hn_dev_link_update, .stats_get = hn_dev_stats_get, + .stats_reset = hn_dev_stats_reset, .xstats_get = hn_dev_xstats_get, .xstats_get_names = hn_dev_xstats_get_names, - .stats_reset = hn_dev_stats_reset, - .xstats_reset = hn_dev_stats_reset, + .xstats_reset = hn_dev_xstats_reset, }; /* @@ -679,6 +728,14 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) if (err) return err; + strlcpy(hv->owner.name, eth_dev->device->name, + RTE_ETH_MAX_OWNER_NAME_LEN); + err = rte_eth_dev_owner_new(&hv->owner.id); + if (err) { + PMD_INIT_LOG(ERR, "Can not get owner id"); + return err; + } + /* Initialize primary channel input for control operations */ err = rte_vmbus_chan_open(vmbus, &hv->channels[0]); if (err) @@ -714,6 +771,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan); + /* If VF was reported but not added, do it now */ + if (hv->vf_present && !hv->vf_dev) { + PMD_INIT_LOG(DEBUG, "Adding VF device"); + + err = hn_vf_add(eth_dev, hv); + if (err) + goto failed; + } + return 0; failed: @@ -743,6 +809,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) hn_detach(hv); rte_vmbus_chan_close(hv->primary->chan); rte_free(hv->primary); + rte_eth_dev_owner_delete(hv->owner.id); eth_dev->data->mac_addrs = NULL; diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c index a458bb720f..9690c5f8a3 100644 --- a/drivers/net/netvsc/hn_nvs.c +++ b/drivers/net/netvsc/hn_nvs.c @@ -532,10 +532,19 @@ void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path) { struct hn_nvs_datapath dp; + int error; + + PMD_DRV_LOG(DEBUG, "set datapath %s", + path ? "VF" : "Synthetic"); memset(&dp, 0, sizeof(dp)); dp.type = NVS_TYPE_SET_DATAPATH; dp.active_path = path; - hn_nvs_req_send(hv, &dp, sizeof(dp)); + error = hn_nvs_req_send(hv, &dp, sizeof(dp)); + if (error) { + PMD_DRV_LOG(ERR, + "send set datapath failed: %d", + error); + } } diff --git a/drivers/net/netvsc/hn_nvs.h b/drivers/net/netvsc/hn_nvs.h index 984a9c11c5..2563fd8d86 100644 --- a/drivers/net/netvsc/hn_nvs.h +++ b/drivers/net/netvsc/hn_nvs.h @@ -105,6 +105,12 @@ struct hn_nvs_ndis_init { uint8_t rsvd[28]; } __rte_packed; +struct hn_nvs_vf_association { + uint32_t type; /* NVS_TYPE_VFASSOC_NOTE */ + uint32_t allocated; + uint32_t serial; +} __rte_packed; + #define NVS_DATAPATH_SYNTHETIC 0 #define NVS_DATAPATH_VF 1 @@ -207,6 +213,9 @@ void hn_nvs_detach(struct hn_data *hv); void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid); int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch); void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path); +void hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *hdr, + const void *data); static inline int hn_nvs_send(struct vmbus_channel *chan, uint16_t flags, diff --git a/drivers/net/netvsc/hn_rndis.c b/drivers/net/netvsc/hn_rndis.c index e0ea73222e..0134ecb67e 100644 --- a/drivers/net/netvsc/hn_rndis.c +++ b/drivers/net/netvsc/hn_rndis.c @@ -913,6 +913,37 @@ int hn_rndis_get_offload(struct hn_data *hv, return 0; } +uint32_t +hn_rndis_get_ptypes(struct hn_data *hv) +{ + struct ndis_offload hwcaps; + uint32_t ptypes; + int error; + + memset(&hwcaps, 0, sizeof(hwcaps)); + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return RTE_PTYPE_L2_ETHER; + } + + ptypes = RTE_PTYPE_L2_ETHER; + + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + ptypes |= RTE_PTYPE_L3_IPV4; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) || + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)) + ptypes |= RTE_PTYPE_L4_TCP; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) || + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)) + ptypes |= RTE_PTYPE_L4_UDP; + + return ptypes; +} + int hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter) { diff --git a/drivers/net/netvsc/hn_rndis.h b/drivers/net/netvsc/hn_rndis.h index 01b5120631..319b497a7a 100644 --- a/drivers/net/netvsc/hn_rndis.h +++ b/drivers/net/netvsc/hn_rndis.h @@ -24,6 +24,7 @@ int hn_rndis_query_rsscaps(struct hn_data *hv, unsigned int *rxr_cnt0); int hn_rndis_conf_rss(struct hn_data *hv, const struct rte_eth_rss_conf *rss_conf); +uint32_t hn_rndis_get_ptypes(struct hn_data *hv); #ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP void hn_rndis_dump(const void *buf); diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index ad22a95f6c..f4a36641b6 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -217,6 +217,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, struct hn_data *hv = dev->data->dev_private; struct hn_tx_queue *txq; uint32_t tx_free_thresh; + int err; PMD_INIT_FUNC_TRACE(); @@ -246,8 +247,14 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, hn_reset_txagg(txq); - dev->data->tx_queues[queue_idx] = txq; + err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, + socket_id, tx_conf); + if (err) { + rte_free(txq); + return err; + } + dev->data->tx_queues[queue_idx] = txq; return 0; } @@ -270,17 +277,6 @@ hn_dev_tx_queue_release(void *arg) rte_free(txq); } -void -hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, - struct rte_eth_txq_info *qinfo) -{ - struct hn_data *hv = dev->data->dev_private; - struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx]; - - qinfo->conf.tx_free_thresh = txq->free_thresh; - qinfo->nb_desc = hv->tx_pool->size; -} - static void hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, unsigned long xactid, const struct hn_nvs_rndis_ack *ack) @@ -713,6 +709,35 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, hn_rx_buf_release(rxb); } +/* + * Called when NVS inband events are received. + * Send up a two part message with port_id and the NVS message + * to the pipe to the netvsc-vf-event control thread. + */ +static void hn_nvs_handle_notify(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *pkt, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + switch (hdr->type) { + case NVS_TYPE_TXTBL_NOTE: + /* Transmit indirection table has locking problems + * in DPDK and therefore not implemented + */ + PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); + break; + + case NVS_TYPE_VFASSOC_NOTE: + hn_nvs_handle_vfassoc(dev, pkt, data); + break; + + default: + PMD_DRV_LOG(INFO, + "got notify, nvs type %u", hdr->type); + } +} + struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, uint16_t queue_id, unsigned int socket_id) @@ -744,13 +769,14 @@ int hn_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf __rte_unused, + const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { struct hn_data *hv = dev->data->dev_private; char ring_name[RTE_RING_NAMESIZE]; struct hn_rx_queue *rxq; unsigned int count; + int error = -ENOMEM; PMD_INIT_FUNC_TRACE(); @@ -780,6 +806,11 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev, if (!rxq->rx_ring) goto fail; + error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, + socket_id, rx_conf, mp); + if (error) + goto fail; + dev->data->rx_queues[queue_idx] = rxq; return 0; @@ -787,7 +818,7 @@ fail: rte_ring_free(rxq->rx_ring); rte_free(rxq->event_buf); rte_free(rxq); - return -ENOMEM; + return error; } void @@ -804,6 +835,9 @@ hn_dev_rx_queue_release(void *arg) rxq->rx_ring = NULL; rxq->mb_pool = NULL; + hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); + + /* Keep primary queue to allow for control operations */ if (rxq != rxq->hv->primary) { rte_free(rxq->event_buf); rte_free(rxq); @@ -818,32 +852,6 @@ hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) return hn_process_events(txq->hv, txq->queue_id, free_cnt); } -void -hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, - struct rte_eth_rxq_info *qinfo) -{ - struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx]; - - qinfo->mp = rxq->mb_pool; - qinfo->scattered_rx = 1; - qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring); -} - -static void -hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr, - const void *data) -{ - const struct hn_nvs_hdr *hdr = data; - - if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) { - PMD_DRV_LOG(ERR, "invalid nvs notify"); - return; - } - - PMD_DRV_LOG(INFO, - "got notify, nvs type %u", hdr->type); -} - /* * Process pending events on the channel. * Called from both Rx queue poll and Tx cleanup @@ -916,7 +924,7 @@ retry: break; case VMBUS_CHANPKT_TYPE_INBAND: - hn_nvs_handle_notify(pkt, data); + hn_nvs_handle_notify(dev, pkt, data); break; default: @@ -1275,7 +1283,9 @@ uint16_t hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct hn_tx_queue *txq = ptxq; + uint16_t queue_id = txq->queue_id; struct hn_data *hv = txq->hv; + struct rte_eth_dev *vf_dev; bool need_sig = false; uint16_t nb_tx; int ret; @@ -1283,6 +1293,15 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (unlikely(hv->closed)) return 0; + /* Transmit over VF if present and up */ + vf_dev = hv->vf_dev; + rte_compiler_barrier(); + if (vf_dev && vf_dev->data->dev_started) { + void *sub_q = vf_dev->data->tx_queues[queue_id]; + + return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); + } + if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) hn_process_events(hv, txq->queue_id, 0); @@ -1304,7 +1323,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) if (unlikely(!pkt)) break; - hn_encap(pkt, txq->queue_id, m); + hn_encap(pkt, queue_id, m); hn_append_to_chim(txq, pkt, m); rte_pktmbuf_free(m); @@ -1331,7 +1350,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) txd->data_size += m->pkt_len; ++txd->packets; - hn_encap(pkt, txq->queue_id, m); + hn_encap(pkt, queue_id, m); ret = hn_xmit_sg(txq, txd, m, &need_sig); if (unlikely(ret != 0)) { @@ -1360,15 +1379,36 @@ hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct hn_rx_queue *rxq = prxq; struct hn_data *hv = rxq->hv; + struct rte_eth_dev *vf_dev; + uint16_t nb_rcv; if (unlikely(hv->closed)) return 0; - /* If ring is empty then process more */ - if (rte_ring_count(rxq->rx_ring) < nb_pkts) + vf_dev = hv->vf_dev; + rte_compiler_barrier(); + + if (vf_dev && vf_dev->data->dev_started) { + /* Normally, with SR-IOV the ring buffer will be empty */ hn_process_events(hv, rxq->queue_id, 0); - /* Get mbufs off staging ring */ - return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts, - nb_pkts, NULL); + /* Get mbufs some bufs off of staging ring */ + nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, + (void **)rx_pkts, + nb_pkts / 2, NULL); + /* And rest off of VF */ + nb_rcv += rte_eth_rx_burst(vf_dev->data->port_id, + rxq->queue_id, + rx_pkts + nb_rcv, nb_pkts - nb_rcv); + } else { + /* If receive ring is not full then get more */ + if (rte_ring_count(rxq->rx_ring) < nb_pkts) + hn_process_events(hv, rxq->queue_id, 0); + + nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, + (void **)rx_pkts, + nb_pkts, NULL); + } + + return nb_rcv; } diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h index 17b67941dc..b8d9e5d5c4 100644 --- a/drivers/net/netvsc/hn_var.h +++ b/drivers/net/netvsc/hn_var.h @@ -94,8 +94,11 @@ struct hn_rx_bufinfo { struct hn_data { struct rte_vmbus_device *vmbus; struct hn_rx_queue *primary; + struct rte_eth_dev *vf_dev; /* Subordinate device */ + rte_spinlock_t vf_lock; uint16_t port_id; bool closed; + bool vf_present; uint32_t link_status; uint32_t link_speed; @@ -124,6 +127,10 @@ struct hn_data { uint8_t rndis_resp[256]; struct ether_addr mac_addr; + + struct rte_eth_dev_owner owner; + struct rte_intr_handle vf_intr; + struct vmbus_channel *channels[HN_MAX_CHANNELS]; }; @@ -160,5 +167,37 @@ int hn_dev_rx_queue_setup(struct rte_eth_dev *dev, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp); void hn_dev_rx_queue_release(void *arg); -void hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, - struct rte_eth_rxq_info *qinfo); + +void hn_vf_info_get(struct hn_data *hv, + struct rte_eth_dev_info *info); +int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv); +int hn_vf_configure(struct rte_eth_dev *dev, + const struct rte_eth_conf *dev_conf); +const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev); +int hn_vf_start(struct rte_eth_dev *dev); +void hn_vf_reset(struct rte_eth_dev *dev); +void hn_vf_stop(struct rte_eth_dev *dev); +void hn_vf_close(struct rte_eth_dev *dev); +int hn_vf_link_update(struct rte_eth_dev *dev, + int wait_to_complete); +int hn_vf_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id); +int hn_vf_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp); +void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id); + +int hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); +void hn_vf_stats_reset(struct rte_eth_dev *dev); +int hn_vf_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int size); +int hn_vf_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n); +void hn_vf_xstats_reset(struct rte_eth_dev *dev); diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c new file mode 100644 index 0000000000..c68d180fd8 --- /dev/null +++ b/drivers/net/netvsc/hn_vf.c @@ -0,0 +1,512 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" + +/* Search for VF with matching MAC address, return port id */ +static int hn_vf_match(const struct rte_eth_dev *dev) +{ + const struct ether_addr *mac = dev->data->mac_addrs; + char buf[32]; + int i; + + ether_format_addr(buf, sizeof(buf), mac); + RTE_ETH_FOREACH_DEV(i) { + const struct rte_eth_dev *vf_dev = &rte_eth_devices[i]; + const struct ether_addr *vf_mac = vf_dev->data->mac_addrs; + + if (vf_dev == dev) + continue; + + ether_format_addr(buf, sizeof(buf), vf_mac); + if (is_same_ether_addr(mac, vf_mac)) + return i; + } + return -ENOENT; +} + +/* + * Attach new PCI VF device and return the port_id + */ +static int hn_vf_attach(struct hn_data *hv, uint16_t port_id, + struct rte_eth_dev **vf_dev) +{ + struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER }; + int ret; + + ret = rte_eth_dev_owner_get(port_id, &owner); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id); + return ret; + } + + if (owner.id != RTE_ETH_DEV_NO_OWNER) { + PMD_DRV_LOG(ERR, "Port %u already owned by other device %s", + port_id, owner.name); + return -EBUSY; + } + + ret = rte_eth_dev_owner_set(port_id, &hv->owner); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id); + return ret; + } + + PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id); + rte_smp_wmb(); + *vf_dev = &rte_eth_devices[port_id]; + return 0; +} + +/* Add new VF device to synthetic device */ +int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv) +{ + int port, err; + + port = hn_vf_match(dev); + if (port < 0) { + PMD_DRV_LOG(NOTICE, "No matching MAC found"); + return port; + } + + rte_spinlock_lock(&hv->vf_lock); + if (hv->vf_dev) { + PMD_DRV_LOG(ERR, "VF already attached"); + err = -EBUSY; + } else { + err = hn_vf_attach(hv, port, &hv->vf_dev); + } + + if (err == 0) { + dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + hv->vf_intr = (struct rte_intr_handle) { + .fd = -1, + .type = RTE_INTR_HANDLE_EXT, + }; + dev->intr_handle = &hv->vf_intr; + hn_nvs_set_datapath(hv, NVS_DATAPATH_VF); + } + rte_spinlock_unlock(&hv->vf_lock); + + return err; +} + +/* Remove new VF device */ +static void hn_vf_remove(struct hn_data *hv) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (!vf_dev) { + PMD_DRV_LOG(ERR, "VF path not active"); + rte_spinlock_unlock(&hv->vf_lock); + return; + } + + /* Stop incoming packets from arriving on VF */ + hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC); + hv->vf_dev = NULL; + + /* Give back ownership */ + rte_eth_dev_owner_unset(vf_dev->data->port_id, hv->owner.id); + rte_spinlock_unlock(&hv->vf_lock); +} + +/* Handle VF association message from host */ +void +hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *hdr, + const void *data) +{ + struct hn_data *hv = dev->data->dev_private; + const struct hn_nvs_vf_association *vf_assoc = data; + + if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) { + PMD_DRV_LOG(ERR, "invalid vf association NVS"); + return; + } + + PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u", + vf_assoc->serial, + vf_assoc->allocated ? "add to" : "remove from", + dev->data->port_id); + + hv->vf_present = vf_assoc->allocated; + + if (dev->state != RTE_ETH_DEV_ATTACHED) + return; + + if (vf_assoc->allocated) + hn_vf_add(dev, hv); + else + hn_vf_remove(hv); +} + +/* + * Merge the info from the VF and synthetic path. + * use the default config of the VF + * and the minimum number of queues and buffer sizes. + */ +static void hn_vf_info_merge(struct rte_eth_dev *vf_dev, + struct rte_eth_dev_info *info) +{ + struct rte_eth_dev_info vf_info; + + rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info); + + info->speed_capa = vf_info.speed_capa; + info->default_rxportconf = vf_info.default_rxportconf; + info->default_txportconf = vf_info.default_txportconf; + + info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues, + info->max_rx_queues); + info->rx_offload_capa &= vf_info.rx_offload_capa; + info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa; + info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads; + + info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues, + info->max_tx_queues); + info->tx_offload_capa &= vf_info.tx_offload_capa; + info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa; + + info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize, + info->min_rx_bufsize); + info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen, + info->max_rx_pktlen); +} + +void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + hn_vf_info_merge(vf_dev, info); + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_link_update(struct rte_eth_dev *dev, + int wait_to_complete) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->link_update) + ret = (*vf_dev->dev_ops->link_update)(dev, wait_to_complete); + rte_spinlock_unlock(&hv->vf_lock); + + return ret; +} + +/* called when VF has link state interrupts enabled */ +static int hn_vf_lsc_event(uint16_t port_id __rte_unused, + enum rte_eth_event_type event, + void *cb_arg, void *out __rte_unused) +{ + struct rte_eth_dev *dev = cb_arg; + + if (event != RTE_ETH_EVENT_INTR_LSC) + return 0; + + /* if link state has changed pass on */ + if (hn_dev_link_update(dev, 0) == 0) + return 0; /* no change */ + + return _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); +} + +static int _hn_vf_configure(struct rte_eth_dev *dev, + struct rte_eth_dev *vf_dev, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_conf vf_conf = *dev_conf; + uint16_t vf_port = vf_dev->data->port_id; + int ret; + + if (dev_conf->intr_conf.lsc && + (vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { + PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u", + vf_port); + vf_conf.intr_conf.lsc = 1; + } else { + PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u", + vf_port); + vf_conf.intr_conf.lsc = 0; + } + + ret = rte_eth_dev_configure(vf_port, + dev->data->nb_rx_queues, + dev->data->nb_tx_queues, + &vf_conf); + if (ret) { + PMD_DRV_LOG(ERR, + "VF configuration failed: %d", ret); + } else if (vf_conf.intr_conf.lsc) { + ret = rte_eth_dev_callback_register(vf_port, + RTE_ETH_DEV_INTR_LSC, + hn_vf_lsc_event, dev); + if (ret) + PMD_DRV_LOG(ERR, + "Failed to register LSC callback for VF %u", + vf_port); + } + return ret; +} + +/* + * Configure VF if present. + * Force VF to have same number of queues as synthetic device + */ +int hn_vf_configure(struct rte_eth_dev *dev, + const struct rte_eth_conf *dev_conf) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + ret = _hn_vf_configure(dev, vf_dev, dev_conf); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + const uint32_t *ptypes = NULL; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get) + ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev); + rte_spinlock_unlock(&hv->vf_lock); + + return ptypes; +} + +int hn_vf_start(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + ret = rte_eth_dev_start(vf_dev->data->port_id); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_stop(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + rte_eth_dev_stop(vf_dev->data->port_id); + rte_spinlock_unlock(&hv->vf_lock); +} + +/* If VF is present, then cascade configuration down */ +#define VF_ETHDEV_FUNC(dev, func) \ + { \ + struct hn_data *hv = (dev)->data->dev_private; \ + struct rte_eth_dev *vf_dev; \ + rte_spinlock_lock(&hv->vf_lock); \ + vf_dev = hv->vf_dev; \ + if (vf_dev) \ + func(vf_dev->data->port_id); \ + rte_spinlock_unlock(&hv->vf_lock); \ + } + +void hn_vf_reset(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_dev_reset); +} + +void hn_vf_close(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_dev_close); +} + +void hn_vf_stats_reset(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_stats_reset); +} + +int hn_vf_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + ret = rte_eth_tx_queue_setup(vf_dev->data->port_id, + queue_idx, nb_desc, + socket_id, tx_conf); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->tx_queue_release) { + void *subq = vf_dev->data->tx_queues[queue_id]; + + (*vf_dev->dev_ops->tx_queue_release)(subq); + } + + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + ret = rte_eth_rx_queue_setup(vf_dev->data->port_id, + queue_idx, nb_desc, + socket_id, rx_conf, mp); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->rx_queue_release) { + void *subq = vf_dev->data->rx_queues[queue_id]; + + (*vf_dev->dev_ops->rx_queue_release)(subq); + } + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev) + ret = rte_eth_stats_get(vf_dev->data->port_id, stats); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +int hn_vf_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *names, + unsigned int n) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int i, count = 0; + char tmp[RTE_ETH_XSTATS_NAME_SIZE]; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->xstats_get_names) + count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n); + rte_spinlock_unlock(&hv->vf_lock); + + /* add vf_ prefix to xstat names */ + if (names) { + for (i = 0; i < count; i++) { + snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name); + strlcpy(names[i].name, tmp, sizeof(names[i].name)); + } + } + + return count; +} + +int hn_vf_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int count = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->xstats_get) + count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n); + rte_spinlock_unlock(&hv->vf_lock); + + return count; +} + +void hn_vf_xstats_reset(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hv->vf_dev; + if (vf_dev && vf_dev->dev_ops->xstats_reset) + vf_dev->dev_ops->xstats_reset(vf_dev); + rte_spinlock_unlock(&hv->vf_lock); +} diff --git a/drivers/net/netvsc/meson.build b/drivers/net/netvsc/meson.build index a717cdd483..c842697162 100644 --- a/drivers/net/netvsc/meson.build +++ b/drivers/net/netvsc/meson.build @@ -3,7 +3,7 @@ build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS') version = 2 -sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c') +sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c') deps += ['bus_vmbus' ] -- 2.20.1