From: Bruce Richardson Date: Fri, 15 May 2015 15:57:00 +0000 (+0100) Subject: xenvirt: move drivers/net/ X-Git-Tag: spdx-start~9160 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=6d71d3b6ee0b6668e991c0296e55f3f41f8b88f2;p=dpdk.git xenvirt: move drivers/net/ Move xenvirt PMD to drivers/net directory Signed-off-by: Bruce Richardson Acked-by: John McNamara Acked-by: Thomas Monjalon --- diff --git a/MAINTAINERS b/MAINTAINERS index 7b8cb2349d..9362c190bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -132,7 +132,7 @@ F: lib/librte_eal/linuxapp/xen_dom0/ F: lib/librte_eal/linuxapp/eal/*xen* F: lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h F: lib/librte_mempool/rte_dom0_mempool.c -F: lib/librte_pmd_xenvirt/ +F: drivers/net/xenvirt/ F: doc/guides/xen/ F: app/test-pmd/mempool_* F: examples/vhost_xen/ diff --git a/doc/guides/prog_guide/source_org.rst b/doc/guides/prog_guide/source_org.rst index b3413f0aaa..4f6f489f1b 100644 --- a/doc/guides/prog_guide/source_org.rst +++ b/doc/guides/prog_guide/source_org.rst @@ -79,7 +79,6 @@ The lib directory contains:: +-- librte_mempool # memory pool manager (fixedsized objects) +-- librte_meter # QoS metering library +-- librte_net # various IP-related headers - +-- librte_pmd_xenvirt # Xen virtio poll mode driver +-- librte_power # power management library +-- librte_ring # software rings (act as lockless FIFOs) +-- librte_sched # QoS scheduler and dropper library @@ -109,6 +108,7 @@ The drivers directory has a net subdirectory which contains:: +-- ring # ring poll mode driver +-- virtio # virtio poll mode driver +-- vmxnet3 # VMXNET3 poll mode driver + +-- xenvirt # Xen virtio poll mode driver Applications ------------ diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 7763845e66..1e6648aa45 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -44,6 +44,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3 +DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt include $(RTE_SDK)/mk/rte.sharelib.mk include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/drivers/net/xenvirt/Makefile b/drivers/net/xenvirt/Makefile new file mode 100644 index 0000000000..f0c796cea6 --- /dev/null +++ b/drivers/net/xenvirt/Makefile @@ -0,0 +1,62 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_xenvirt.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_eth_xenvirt_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += rte_eth_xenvirt.c rte_mempool_gntalloc.c rte_xen_lib.c + +# +# Export include files +# +SYMLINK-y-include += rte_eth_xenvirt.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_eal lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_mempool lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_net lib/librte_malloc +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_cmdline + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c b/drivers/net/xenvirt/rte_eth_xenvirt.c new file mode 100644 index 0000000000..73e8bce02a --- /dev/null +++ b/drivers/net/xenvirt/rte_eth_xenvirt.c @@ -0,0 +1,714 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 +#include +#else +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_xen_lib.h" +#include "virtqueue.h" +#include "rte_eth_xenvirt.h" + +#define VQ_DESC_NUM 256 +#define VIRTIO_MBUF_BURST_SZ 64 + +/* virtio_idx is increased after new device is created.*/ +static int virtio_idx = 0; + +static const char *drivername = "xen dummy virtio PMD"; + +static struct rte_eth_link pmd_link = { + .link_speed = 10000, + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_status = 0 +}; + +static inline struct rte_mbuf * +rte_rxmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + + m = __rte_mbuf_raw_alloc(mp); + __rte_mbuf_sanity_check_raw(m, 0); + + return m; +} + + +static uint16_t +eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *rxvq = q; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + uint32_t i; + struct pmd_internals *pi = rxvq->internals; + + nb_used = VIRTQUEUE_NUSED(rxvq); + + rte_compiler_barrier(); /* rmb */ + num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); + num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); + if (unlikely(num == 0)) return 0; + + num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num); + PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num); + for (i = 0; i < num ; i ++) { + rxm = rx_pkts[i]; + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); + rxm->next = NULL; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr)); + rxm->nb_segs = 1; + rxm->port = pi->port_id; + rxm->pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr)); + } + /* allocate new mbuf for the used descriptor */ + while (likely(!virtqueue_full(rxvq))) { + new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + break; + } + if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) { + rte_pktmbuf_free_seg(new_mbuf); + break; + } + } + pi->eth_stats.ipackets += num; + return num; +} + +static uint16_t +eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *txvq = tx_queue; + struct rte_mbuf *txm; + uint16_t nb_used, nb_tx, num, i; + int error; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ]; + struct pmd_internals *pi = txvq->internals; + + nb_tx = 0; + + if (unlikely(nb_pkts == 0)) + return 0; + + PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); + nb_used = VIRTQUEUE_NUSED(txvq); + + rte_compiler_barrier(); /* rmb */ + + num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); + num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num); + + for (i = 0; i < num ; i ++) { + /* mergable not supported, one segment only */ + rte_pktmbuf_free_seg(snd_pkts[i]); + } + + while (nb_tx < nb_pkts) { + if (likely(!virtqueue_full(txvq))) { + /* TODO drop tx_pkts if it contains multiple segments */ + txm = tx_pkts[nb_tx]; + error = virtqueue_enqueue_xmit(txvq, txm); + if (unlikely(error)) { + if (error == ENOSPC) + PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n"); + else if (error == EMSGSIZE) + PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n"); + else + PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error); + break; + } + nb_tx++; + } else { + PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n"); + /* virtqueue_notify not needed in our para-virt solution */ + break; + } + } + pi->eth_stats.opackets += nb_tx; + return nb_tx; +} + +static int +eth_dev_configure(struct rte_eth_dev *dev __rte_unused) +{ + RTE_LOG(ERR, PMD, "%s\n", __func__); + return 0; +} + +/* + * Create a shared page between guest and host. + * Host monitors this page if it is cleared on unmap, and then + * do necessary clean up. + */ +static void +gntalloc_vring_flag(int vtidx) +{ + char key_str[PATH_MAX]; + char val_str[PATH_MAX]; + uint32_t gref_tmp; + void *ptr; + + if (grefwatch_from_alloc(&gref_tmp, &ptr)) { + RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n"); + exit(0); + } + + *(uint8_t *)ptr = MAP_FLAG; + snprintf(val_str, sizeof(val_str), "%u", gref_tmp); + snprintf(key_str, sizeof(key_str), + DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx); + xenstore_write(key_str, val_str); +} + +/* + * Notify host this virtio device is started. + * Host could start polling this device. + */ +static void +dev_start_notify(int vtidx) +{ + char key_str[PATH_MAX]; + char val_str[PATH_MAX]; + + RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx); + gntalloc_vring_flag(vtidx); + + snprintf(key_str, sizeof(key_str), "%s%s%d", + DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR, + vtidx); + snprintf(val_str, sizeof(val_str), "1"); + xenstore_write(key_str, val_str); +} + +/* + * Notify host this virtio device is stopped. + * Host could stop polling this device. + */ +static void +dev_stop_notify(int vtidx) +{ + RTE_SET_USED(vtidx); +} + + +static int +update_mac_address(struct ether_addr *mac_addrs, int vtidx) +{ + char key_str[PATH_MAX]; + char val_str[PATH_MAX]; + int rv; + + if (mac_addrs == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__); + return -1; + } + rv = snprintf(key_str, sizeof(key_str), + DPDK_XENSTORE_PATH"%d_ether_addr", vtidx); + if (rv == -1) + return rv; + rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x", + mac_addrs->addr_bytes[0], + mac_addrs->addr_bytes[1], + mac_addrs->addr_bytes[2], + mac_addrs->addr_bytes[3], + mac_addrs->addr_bytes[4], + mac_addrs->addr_bytes[5]); + if (rv == -1) + return rv; + if (xenstore_write(key_str, val_str)) + return rv; + return 0; +} + + +static int +eth_dev_start(struct rte_eth_dev *dev) +{ + struct virtqueue *rxvq = dev->data->rx_queues[0]; + struct virtqueue *txvq = dev->data->tx_queues[0]; + struct rte_mbuf *m; + struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private; + int rv; + + dev->data->dev_link.link_status = 1; + while (!virtqueue_full(rxvq)) { + m = rte_rxmbuf_alloc(rxvq->mpool); + if (m == NULL) + break; + /* Enqueue allocated buffers. */ + if (virtqueue_enqueue_recv_refill(rxvq, m)) { + rte_pktmbuf_free_seg(m); + break; + } + } + + rxvq->internals = pi; + txvq->internals = pi; + + rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx); + if (rv) + return -1; + dev_start_notify(pi->virtio_idx); + + return 0; +} + +static void +eth_dev_stop(struct rte_eth_dev *dev) +{ + struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private; + + dev->data->dev_link.link_status = 0; + dev_stop_notify(pi->virtio_idx); +} + +/* + * Notify host this virtio device is closed. + * Host could do necessary clean up to this device. + */ +static void +eth_dev_close(struct rte_eth_dev *dev) +{ + RTE_SET_USED(dev); +} + +static void +eth_dev_info(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info) +{ + struct pmd_internals *internals = dev->data->dev_private; + + RTE_SET_USED(internals); + dev_info->driver_name = drivername; + dev_info->max_mac_addrs = 1; + dev_info->max_rx_pktlen = (uint32_t)2048; + dev_info->max_rx_queues = (uint16_t)1; + dev_info->max_tx_queues = (uint16_t)1; + dev_info->min_rx_bufsize = 0; + dev_info->pci_dev = NULL; +} + +static void +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct pmd_internals *internals = dev->data->dev_private; + if(stats) + rte_memcpy(stats, &internals->eth_stats, sizeof(*stats)); +} + +static void +eth_stats_reset(struct rte_eth_dev *dev) +{ + struct pmd_internals *internals = dev->data->dev_private; + /* Reset software totals */ + memset(&internals->eth_stats, 0, sizeof(internals->eth_stats)); +} + +static void +eth_queue_release(void *q __rte_unused) +{ +} + +static int +eth_link_update(struct rte_eth_dev *dev __rte_unused, + int wait_to_complete __rte_unused) +{ + return 0; +} + +/* + * Create shared vring between guest and host. + * Memory is allocated through grant alloc driver, so it is not physical continuous. + */ +static void * +gntalloc_vring_create(int queue_type, uint32_t size, int vtidx) +{ + char key_str[PATH_MAX] = {0}; + char val_str[PATH_MAX] = {0}; + void *va = NULL; + int pg_size; + uint32_t pg_num; + uint32_t *gref_arr = NULL; + phys_addr_t *pa_arr = NULL; + uint64_t start_index; + int rv; + + pg_size = getpagesize(); + size = RTE_ALIGN_CEIL(size, pg_size); + pg_num = size / pg_size; + + gref_arr = calloc(pg_num, sizeof(gref_arr[0])); + pa_arr = calloc(pg_num, sizeof(pa_arr[0])); + + if (gref_arr == NULL || pa_arr == NULL) { + RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__); + goto out; + } + + va = gntalloc(size, gref_arr, &start_index); + if (va == NULL) { + RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__); + goto out; + } + + if (get_phys_map(va, pa_arr, pg_num, pg_size)) + goto out; + + /* write in xenstore gref and pfn for each page of vring */ + if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) { + gntfree(va, size, start_index); + va = NULL; + goto out; + } + + if (queue_type == VTNET_RQ) + rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx); + else + rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx); + if (rv == -1 || xenstore_write(key_str, val_str) == -1) { + gntfree(va, size, start_index); + va = NULL; + } +out: + if (pa_arr) + free(pa_arr); + if (gref_arr) + free(gref_arr); + + return va; +} + + + +static struct virtqueue * +virtio_queue_setup(struct rte_eth_dev *dev, int queue_type) +{ + struct virtqueue *vq = NULL; + uint16_t vq_size = VQ_DESC_NUM; + int i = 0; + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + size_t size; + struct vring *vr; + + /* Allocate memory for virtqueue. */ + if (queue_type == VTNET_RQ) { + snprintf(vq_name, sizeof(vq_name), "port%d_rvq", + dev->data->port_id); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); + if (vq == NULL) { + RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__); + return NULL; + } + memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + } else if(queue_type == VTNET_TQ) { + snprintf(vq_name, sizeof(vq_name), "port%d_tvq", + dev->data->port_id); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); + if (vq == NULL) { + RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__); + return NULL; + } + memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + } + + memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); + + vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN; + vq->vq_nentries = vq_size; + vq->vq_free_cnt = vq_size; + /* Calcuate vring size according to virtio spec */ + size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); + vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); + /* Allocate memory for virtio vring through gntalloc driver*/ + vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size, + ((struct pmd_internals *)dev->data->dev_private)->virtio_idx); + memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); + vr = &vq->vq_ring; + vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment); + /* + * Locally maintained last consumed index, this idex trails + * vq_ring.used->idx. + */ + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + /* Chain all the descriptors in the ring with an END */ + for (i = 0; i < vq_size - 1; i++) + vr->desc[i].next = (uint16_t)(i + 1); + vr->desc[i].next = VQ_RING_DESC_CHAIN_END; + + return vq; +} + +static int +eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id, + uint16_t nb_rx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_rxconf *rx_conf __rte_unused, + struct rte_mempool *mb_pool) +{ + struct virtqueue *vq; + vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ); + vq->mpool = mb_pool; + return 0; +} + +static int +eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf __rte_unused) +{ + dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ); + return 0; +} + + + +static const struct eth_dev_ops ops = { + .dev_start = eth_dev_start, + .dev_stop = eth_dev_stop, + .dev_close = eth_dev_close, + .dev_configure = eth_dev_configure, + .dev_infos_get = eth_dev_info, + .rx_queue_setup = eth_rx_queue_setup, + .tx_queue_setup = eth_tx_queue_setup, + .rx_queue_release = eth_queue_release, + .tx_queue_release = eth_queue_release, + .link_update = eth_link_update, + .stats_get = eth_stats_get, + .stats_reset = eth_stats_reset, +}; + + +static int +rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict, + const char *name, const char *params) +{ + int i; + char *pairs[RTE_ETH_XENVIRT_MAX_ARGS]; + int num_of_pairs; + char *pair[2]; + char *args; + int ret = -1; + + if (params == NULL) + return 0; + + args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE); + if (args == NULL) { + RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name); + return -1; + } + rte_memcpy(args, params, strlen(params)); + + num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN), + pairs, + RTE_ETH_XENVIRT_MAX_ARGS , + RTE_ETH_XENVIRT_PAIRS_DELIM); + + for (i = 0; i < num_of_pairs; i++) { + pair[0] = NULL; + pair[1] = NULL; + rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN), + pair, 2, + RTE_ETH_XENVIRT_KEY_VALUE_DELIM); + + if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0 + || pair[1][0] == 0) { + RTE_LOG(ERR, PMD, + "Couldn't parse %s device," + "wrong key or value \n", name); + goto err; + } + + if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM, + sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) { + if (cmdline_parse_etheraddr(NULL, + pair[1], + &dict->addr, + sizeof(dict->addr)) < 0) { + RTE_LOG(ERR, PMD, + "Invalid %s device ether address\n", + name); + goto err; + } + + dict->addr_valid = 1; + } + } + + ret = 0; +err: + rte_free(args); + return ret; +} + +enum dev_action { + DEV_CREATE, + DEV_ATTACH +}; + + +static int +eth_dev_xenvirt_create(const char *name, const char *params, + const unsigned numa_node, + enum dev_action action) +{ + struct rte_eth_dev_data *data = NULL; + struct rte_pci_device *pci_dev = NULL; + struct pmd_internals *internals = NULL; + struct rte_eth_dev *eth_dev = NULL; + struct xenvirt_dict dict; + bzero(&dict, sizeof(struct xenvirt_dict)); + + RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n", + numa_node); + RTE_SET_USED(action); + + if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) { + RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__); + return -1; + } + + /* now do all data allocation - for eth_dev structure, dummy pci driver + * and internal (private) data + */ + data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); + if (data == NULL) + goto err; + + pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node); + if (pci_dev == NULL) + goto err; + + internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node); + if (internals == NULL) + goto err; + + /* reserve an ethdev entry */ + eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + if (eth_dev == NULL) + goto err; + + pci_dev->numa_node = numa_node; + + data->dev_private = internals; + data->port_id = eth_dev->data->port_id; + data->nb_rx_queues = (uint16_t)1; + data->nb_tx_queues = (uint16_t)1; + data->dev_link = pmd_link; + data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0); + + if(dict.addr_valid) + memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr)); + else + eth_random_addr(&data->mac_addrs->addr_bytes[0]); + + eth_dev->data = data; + eth_dev->dev_ops = &ops; + eth_dev->pci_dev = pci_dev; + + eth_dev->rx_pkt_burst = eth_xenvirt_rx; + eth_dev->tx_pkt_burst = eth_xenvirt_tx; + + internals->virtio_idx = virtio_idx++; + internals->port_id = eth_dev->data->port_id; + + return 0; + +err: + rte_free(data); + rte_free(pci_dev); + rte_free(internals); + + return -1; +} + + +/*TODO: Support multiple process model */ +static int +rte_pmd_xenvirt_devinit(const char *name, const char *params) +{ + if (virtio_idx == 0) { + if (xenstore_init() != 0) { + RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__); + return -1; + } + if (gntalloc_open() != 0) { + RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__); + return -1; + } + } + eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE); + return 0; +} + +static struct rte_driver pmd_xenvirt_drv = { + .name = "eth_xenvirt", + .type = PMD_VDEV, + .init = rte_pmd_xenvirt_devinit, +}; + +PMD_REGISTER_DRIVER(pmd_xenvirt_drv); diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.h b/drivers/net/xenvirt/rte_eth_xenvirt.h new file mode 100644 index 0000000000..fc15a636a7 --- /dev/null +++ b/drivers/net/xenvirt/rte_eth_xenvirt.h @@ -0,0 +1,62 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETH_XENVIRT_H_ +#define _RTE_ETH_XENVIRT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * Creates mempool for xen virtio PMD. + * This function uses memzone_reserve to allocate memory for meta data, + * and uses grant alloc driver to allocate memory for data area. + * The input parameters are exactly the same as rte_mempool_create. + */ +struct rte_mempool * +rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/net/xenvirt/rte_eth_xenvirt_version.map b/drivers/net/xenvirt/rte_eth_xenvirt_version.map new file mode 100644 index 0000000000..dd636f72a8 --- /dev/null +++ b/drivers/net/xenvirt/rte_eth_xenvirt_version.map @@ -0,0 +1,7 @@ +DPDK_2.0 { + global: + + rte_mempool_gntalloc_create; + + local: *; +}; diff --git a/drivers/net/xenvirt/rte_mempool_gntalloc.c b/drivers/net/xenvirt/rte_mempool_gntalloc.c new file mode 100644 index 0000000000..3a650e8dc2 --- /dev/null +++ b/drivers/net/xenvirt/rte_mempool_gntalloc.c @@ -0,0 +1,298 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "rte_xen_lib.h" +#include "rte_eth_xenvirt.h" + +struct _gntarr { + uint32_t gref; + phys_addr_t pa; + uint64_t index; + void *va; +}; + +struct _mempool_gntalloc_info { + struct rte_mempool *mp; + uint32_t pg_num; + uint32_t *gref_arr; + phys_addr_t *pa_arr; + void *va; + uint32_t mempool_idx; + uint64_t start_index; +}; + + +static rte_atomic32_t global_xenvirt_mempool_idx = RTE_ATOMIC32_INIT(-1); + +static int +compare(const void *p1, const void *p2) +{ + return ((const struct _gntarr *)p1)->pa - ((const struct _gntarr *)p2)->pa; +} + + +static struct _mempool_gntalloc_info +_create_mempool(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + struct _mempool_gntalloc_info mgi; + struct rte_mempool *mp = NULL; + struct rte_mempool_objsz objsz; + uint32_t pg_num, rpg_num, pg_shift, pg_sz; + char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */ + ssize_t sz, usz; /* usz: unused size */ + /* + * for each page allocated through xen_gntalloc driver, + * gref_arr:stores grant references, + * pa_arr: stores physical address, + * gnt_arr: stores all meta dat + */ + uint32_t *gref_arr = NULL; + phys_addr_t *pa_arr = NULL; + struct _gntarr *gnt_arr = NULL; + /* start index of the grant referances, used for dealloc*/ + uint64_t start_index; + uint32_t i, j; + int rv = 0; + struct ioctl_gntalloc_dealloc_gref arg; + + mgi.mp = NULL; + va = orig_va = uv = NULL; + pg_num = rpg_num = 0; + sz = 0; + + pg_sz = getpagesize(); + if (rte_is_power_of_2(pg_sz) == 0) { + goto out; + } + pg_shift = rte_bsf32(pg_sz); + + rte_mempool_calc_obj_size(elt_size, flags, &objsz); + sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift); + pg_num = sz >> pg_shift; + + pa_arr = calloc(pg_num, sizeof(pa_arr[0])); + gref_arr = calloc(pg_num, sizeof(gref_arr[0])); + gnt_arr = calloc(pg_num, sizeof(gnt_arr[0])); + if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL)) + goto out; + + /* grant index is continuous in ascending order */ + orig_va = gntalloc(sz, gref_arr, &start_index); + if (orig_va == NULL) + goto out; + + get_phys_map(orig_va, pa_arr, pg_num, pg_sz); + for (i = 0; i < pg_num; i++) { + gnt_arr[i].index = start_index + i * pg_sz; + gnt_arr[i].gref = gref_arr[i]; + gnt_arr[i].pa = pa_arr[i]; + gnt_arr[i].va = RTE_PTR_ADD(orig_va, i * pg_sz); + } + qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare); + + va = get_xen_virtual(sz, pg_sz); + if (va == NULL) { + goto out; + } + + /* + * map one by one, as index isn't continuous now. + * pg_num VMAs, doesn't linux has a limitation on this? + */ + for (i = 0; i < pg_num; i++) { + /* update gref_arr and pa_arr after sort */ + gref_arr[i] = gnt_arr[i].gref; + pa_arr[i] = gnt_arr[i].pa; + gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index); + if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) { + RTE_LOG(ERR, PMD, "failed to map %d pages\n", i); + goto mmap_failed; + } + } + + /* + * Check that allocated size is big enough to hold elt_num + * objects and a calcualte how many bytes are actually required. + */ + usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift); + if (usz < 0) { + mp = NULL; + i = pg_num; + goto mmap_failed; + } else { + /* unmap unused pages if any */ + uv = RTE_PTR_ADD(va, usz); + if ((usz = va + sz - uv) > 0) { + + RTE_LOG(ERR, PMD, + "%s(%s): unmap unused %zu of %zu " + "mmaped bytes @%p orig:%p\n", + __func__, name, usz, sz, uv, va); + munmap(uv, usz); + i = (sz - usz) / pg_sz; + for (; i < pg_num; i++) { + arg.count = 1; + arg.index = gnt_arr[i].index; + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg); + if (rv) { + /* shouldn't fail here */ + RTE_LOG(ERR, PMD, "va=%p pa=%p index=%p %s\n", + gnt_arr[i].va, + (void *)gnt_arr[i].pa, + (void *)arg.index, strerror(errno)); + rte_panic("gntdealloc failed when freeing pages\n"); + } + } + + rpg_num = (sz - usz) >> pg_shift; + } else + rpg_num = pg_num; + + mp = rte_mempool_xmem_create(name, elt_num, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, va, pa_arr, rpg_num, pg_shift); + + RTE_VERIFY(elt_num == mp->size); + } + mgi.mp = mp; + mgi.pg_num = rpg_num; + mgi.gref_arr = gref_arr; + mgi.pa_arr = pa_arr; + if (mp) + mgi.mempool_idx = rte_atomic32_add_return(&global_xenvirt_mempool_idx, 1); + mgi.start_index = start_index; + mgi.va = va; + + if (mp == NULL) { + i = pg_num; + goto mmap_failed; + } + +/* + * unmap only, without deallocate grant reference. + * unused pages have already been unmaped, + * unmap twice will fail, but it is safe. + */ +mmap_failed: + for (j = 0; j < i; j++) { + if (gnt_arr[i].va) + munmap(gnt_arr[i].va, pg_sz); + } +out: + if (gnt_arr) + free(gnt_arr); + if (orig_va) + munmap(orig_va, sz); + if (mp == NULL) { + if (gref_arr) + free(gref_arr); + if (pa_arr) + free(pa_arr); + + /* some gref has already been de-allocated from the list in the driver, + * so dealloc one by one, and it is safe to deallocate twice + */ + if (orig_va) { + for (i = 0; i < pg_num; i++) { + arg.index = start_index + i * pg_sz; + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg); + } + } + } + return mgi; +} + +struct rte_mempool * +rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + int rv; + uint32_t i; + struct _mempool_gntalloc_info mgi; + struct ioctl_gntalloc_dealloc_gref arg; + int pg_sz = getpagesize(); + + mgi = _create_mempool(name, elt_num, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags); + if (mgi.mp) { + rv = grant_gntalloc_mbuf_pool(mgi.mp, + mgi.pg_num, + mgi.gref_arr, + mgi.pa_arr, + mgi.mempool_idx); + free(mgi.gref_arr); + free(mgi.pa_arr); + if (rv == 0) + return mgi.mp; + /* + * in _create_mempool, unused pages have already been unmapped, deallocagted + * unmap and dealloc the remained ones here. + */ + munmap(mgi.va, pg_sz * mgi.pg_num); + for (i = 0; i < mgi.pg_num; i++) { + arg.index = mgi.start_index + i * pg_sz; + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg); + } + return NULL; + } + return NULL; + + + +} diff --git a/drivers/net/xenvirt/rte_xen_lib.c b/drivers/net/xenvirt/rte_xen_lib.c new file mode 100644 index 0000000000..b3932f0e06 --- /dev/null +++ b/drivers/net/xenvirt/rte_xen_lib.c @@ -0,0 +1,428 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 +#include +#else +#include +#endif +#include + +#include +#include + +#include "rte_xen_lib.h" + +/* + * The grant node format in xenstore for vring/mpool is: + * 0_rx_vring_gref = "gref1#, gref2#, gref3#" + * 0_mempool_gref = "gref1#, gref2#, gref3#" + * each gref# is a grant reference for a shared page. + * In each shared page, we store the grant_node_item items. + */ +struct grant_node_item { + uint32_t gref; + uint32_t pfn; +} __attribute__((packed)); + +/* fd for xen_gntalloc driver, used to allocate grant pages*/ +int gntalloc_fd = -1; + +/* xenstore path for local domain, now it is '/local/domain/domid/' */ +static char *dompath = NULL; +/* handle to xenstore read/write operations */ +static struct xs_handle *xs = NULL; + +/* + * Reserve a virtual address space. + * On success, returns the pointer. On failure, returns NULL. + */ +void * +get_xen_virtual(size_t size, size_t page_sz) +{ + void *addr; + uintptr_t aligned_addr; + + addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, PMD, "failed get a virtual area\n"); + return NULL; + } + + aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz); + addr = (void *)(aligned_addr); + + return addr; +} + +/* + * Get the physical address for virtual memory starting at va. + */ +int +get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz) +{ + int32_t fd, rc = 0; + uint32_t i, nb; + off_t ofs; + + ofs = (uintptr_t)va / pg_sz * sizeof(*pa); + nb = pg_num * sizeof(*pa); + + if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0 || + (rc = pread(fd, pa, nb, ofs)) < 0 || + (rc -= nb) != 0) { + RTE_LOG(ERR, PMD, "%s: failed read of %u bytes from \'%s\' " + "at offset %zu, error code: %d\n", + __func__, nb, PAGEMAP_FNAME, ofs, errno); + rc = ENOENT; + } + + close(fd); + for (i = 0; i != pg_num; i++) + pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz; + + return rc; +} + +int +gntalloc_open(void) +{ + gntalloc_fd = open(XEN_GNTALLOC_FNAME, O_RDWR); + return (gntalloc_fd != -1) ? 0 : -1; +} + +void +gntalloc_close(void) +{ + if (gntalloc_fd != -1) + close(gntalloc_fd); + gntalloc_fd = -1; +} + +void * +gntalloc(size_t size, uint32_t *gref, uint64_t *start_index) +{ + int page_size = getpagesize(); + uint32_t i, pg_num; + void *va; + int rv; + struct ioctl_gntalloc_alloc_gref *arg; + struct ioctl_gntalloc_dealloc_gref arg_d; + + if (size % page_size) { + RTE_LOG(ERR, PMD, "%s: %zu isn't multiple of page size\n", + __func__, size); + return NULL; + } + + pg_num = size / page_size; + arg = malloc(sizeof(*arg) + (pg_num - 1) * sizeof(uint32_t)); + if (arg == NULL) + return NULL; + arg->domid = DOM0_DOMID; + arg->flags = GNTALLOC_FLAG_WRITABLE; + arg->count = pg_num; + + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, arg); + if (rv) { + RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__); + free(arg); + return NULL; + } + + va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gntalloc_fd, arg->index); + if (va == MAP_FAILED) { + RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__); + arg_d.count = pg_num; + arg_d.index = arg->index; + ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg_d); + free(arg); + return NULL; + } + + if (gref) { + for (i = 0; i < pg_num; i++) { + gref[i] = arg->gref_ids[i]; + } + } + if (start_index) + *start_index = arg->index; + + free(arg); + + return va; +} + +int +grefwatch_from_alloc(uint32_t *gref, void **pptr) +{ + int rv; + void *ptr; + int pg_size = getpagesize(); + struct ioctl_gntalloc_alloc_gref arg = { + .domid = DOM0_DOMID, + .flags = GNTALLOC_FLAG_WRITABLE, + .count = 1 + }; + struct ioctl_gntalloc_dealloc_gref arg_d; + struct ioctl_gntalloc_unmap_notify notify = { + .action = UNMAP_NOTIFY_CLEAR_BYTE + }; + + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg); + if (rv) { + RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__); + return -1; + } + + ptr = (void *)mmap(NULL, pg_size, PROT_READ|PROT_WRITE, MAP_SHARED, gntalloc_fd, arg.index); + arg_d.index = arg.index; + arg_d.count = 1; + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__); + ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); + return -1; + } + if (pptr) + *pptr = ptr; + if (gref) + *gref = arg.gref_ids[0]; + + notify.index = arg.index; + rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, ¬ify); + if (rv) { + RTE_LOG(ERR, PMD, "%s: unmap notify failed\n", __func__); + munmap(ptr, pg_size); + ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); + return -1; + } + + return 0; +} + +void +gntfree(void *va, size_t sz, uint64_t start_index) +{ + struct ioctl_gntalloc_dealloc_gref arg_d; + + if (va && sz) { + munmap(va, sz); + arg_d.count = sz / getpagesize(); + arg_d.index = start_index; + ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); + } +} + +static int +xenstore_cleanup(void) +{ + char store_path[PATH_MAX] = {0}; + + if (snprintf(store_path, sizeof(store_path), + "%s%s", dompath, DPDK_XENSTORE_NODE) == -1) + return -1; + + if (xs_rm(xs, XBT_NULL, store_path) == false) { + RTE_LOG(ERR, PMD, "%s: failed cleanup node\n", __func__); + return -1; + } + + return 0; +} + +int +xenstore_init(void) +{ + unsigned int len, domid; + char *buf; + static int cleanup = 0; + char *end; + + xs = xs_domain_open(); + if (xs == NULL) { + RTE_LOG(ERR, PMD,"%s: xs_domain_open failed\n", __func__); + return -1; + } + buf = xs_read(xs, XBT_NULL, "domid", &len); + if (buf == NULL) { + RTE_LOG(ERR, PMD, "%s: failed read domid\n", __func__); + return -1; + } + errno = 0; + domid = strtoul(buf, &end, 0); + if (errno != 0 || end == NULL || end == buf || domid == 0) + return -1; + + RTE_LOG(INFO, PMD, "retrieved dom ID = %d\n", domid); + + dompath = xs_get_domain_path(xs, domid); + if (dompath == NULL) + return -1; + + xs_transaction_start(xs); /* When to stop transaction */ + + if (cleanup == 0) { + if (xenstore_cleanup()) + return -1; + cleanup = 1; + } + + return 0; +} + +int +xenstore_write(const char *key_str, const char *val_str) +{ + char grant_path[PATH_MAX]; + int rv, len; + + if (xs == NULL) { + RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__); + return -1; + } + rv = snprintf(grant_path, sizeof(grant_path), "%s%s", dompath, key_str); + if (rv == -1) { + RTE_LOG(ERR, PMD, "%s: snprintf %s %s failed\n", + __func__, dompath, key_str); + return -1; + } + len = strnlen(val_str, PATH_MAX); + + if (xs_write(xs, XBT_NULL, grant_path, val_str, len) == false) { + RTE_LOG(ERR, PMD, "%s: xs_write failed\n", __func__); + return -1; + } + + return 0; +} + +int +grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size) +{ + uint64_t start_index; + int pg_size; + uint32_t pg_shift; + void *ptr = NULL; + uint32_t count, entries_per_pg; + uint32_t i, j = 0, k = 0;; + uint32_t *gref_tmp; + int first = 1; + char tmp_str[PATH_MAX] = {0}; + int rv = -1; + + pg_size = getpagesize(); + if (rte_is_power_of_2(pg_size) == 0) { + return -1; + } + pg_shift = rte_bsf32(pg_size); + if (pg_size % sizeof(struct grant_node_item)) { + RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n"); + return -1; + } + + entries_per_pg = pg_size / sizeof(struct grant_node_item); + count = (pg_num + entries_per_pg - 1 ) / entries_per_pg; + gref_tmp = malloc(count * sizeof(uint32_t)); + if (gref_tmp == NULL) + return -1; + ptr = gntalloc(pg_size * count, gref_tmp, &start_index); + if (ptr == NULL) { + RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count); + free(gref_tmp); + return -1; + } + + while (j < pg_num) { + if (first) { + rv = snprintf(val_str, str_size, "%u", gref_tmp[k]); + first = 0; + } else { + snprintf(tmp_str, PATH_MAX, "%s", val_str); + rv = snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]); + } + k++; + if (rv == -1) + break; + + for (i = 0; i < entries_per_pg && j < pg_num ; i++) { + ((struct grant_node_item *)ptr)->gref = gref_arr[j]; + ((struct grant_node_item *)ptr)->pfn = pa_arr[j] >> pg_shift; + ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item)); + j++; + } + } + if (rv == -1) { + gntfree(ptr, pg_size * count, start_index); + } else + rv = 0; + free(gref_tmp); + return rv; +} + + +int +grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx) +{ + char key_str[PATH_MAX] = {0}; + char val_str[PATH_MAX] = {0}; + + if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) { + return -1; + } + + if (snprintf(key_str, sizeof(key_str), + DPDK_XENSTORE_PATH"%d"MEMPOOL_XENSTORE_STR, mempool_idx) == -1) + return -1; + if (xenstore_write(key_str, val_str) == -1) + return -1; + + if (snprintf(key_str, sizeof(key_str), + DPDK_XENSTORE_PATH"%d"MEMPOOL_VA_XENSTORE_STR, mempool_idx) == -1) + return -1; + if (snprintf(val_str, sizeof(val_str), "%"PRIxPTR, (uintptr_t)mpool->elt_va_start) == -1) + return -1; + if (xenstore_write(key_str, val_str) == -1) + return -1; + + return 0; +} diff --git a/drivers/net/xenvirt/rte_xen_lib.h b/drivers/net/xenvirt/rte_xen_lib.h new file mode 100644 index 0000000000..0ba7148af9 --- /dev/null +++ b/drivers/net/xenvirt/rte_xen_lib.h @@ -0,0 +1,113 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_XEN_DUMMY_PMD_H +#define _RTE_XEN_DUMMY_PMD_H + +#include + +#include +#include +#include + +#define PAGEMAP_FNAME "/proc/self/pagemap" +#define XEN_GNTALLOC_FNAME "/dev/xen/gntalloc" +#define DPDK_XENSTORE_PATH "/control/dpdk/" +#define DPDK_XENSTORE_NODE "/control/dpdk" +/*format 0_mempool_gref = "1537,1524,1533" */ +#define MEMPOOL_XENSTORE_STR "_mempool_gref" +/*format 0_mempool_va = 0x80340000 */ +#define MEMPOOL_VA_XENSTORE_STR "_mempool_va" +/*format 0_rx_vring_gref = "1537,1524,1533" */ +#define RXVRING_XENSTORE_STR "_rx_vring_gref" +/*format 0_tx_vring_gref = "1537,1524,1533" */ +#define TXVRING_XENSTORE_STR "_tx_vring_gref" +#define VRING_FLAG_STR "_vring_flag" +/*format: event_type_start_0 = 1*/ +#define EVENT_TYPE_START_STR "event_type_start_" + +#define DOM0_DOMID 0 +/* + * the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux + * Documentation). + */ +#define PAGEMAP_PFN_BITS 54 +#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t) + +#define MAP_FLAG 0xA5 + +#define RTE_ETH_XENVIRT_PAIRS_DELIM ';' +#define RTE_ETH_XENVIRT_KEY_VALUE_DELIM '=' +#define RTE_ETH_XENVIRT_MAX_ARGS 1 +#define RTE_ETH_XENVIRT_MAC_PARAM "mac" +struct xenvirt_dict { + uint8_t addr_valid; + struct ether_addr addr; +}; + +extern int gntalloc_fd; + +int +gntalloc_open(void); + +void +gntalloc_close(void); + +void * +gntalloc(size_t sz, uint32_t *gref, uint64_t *start_index); + +void +gntfree(void *va, size_t sz, uint64_t start_index); + +int +xenstore_init(void); + +int +xenstore_write(const char *key_str, const char *val_str); + +int +get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz); + +void * +get_xen_virtual(size_t size, size_t page_sz); + +int +grefwatch_from_alloc(uint32_t *gref, void **pptr); + + +int grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size); + +int +grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx); + +#endif diff --git a/drivers/net/xenvirt/virtio_logs.h b/drivers/net/xenvirt/virtio_logs.h new file mode 100644 index 0000000000..d6c33f7b38 --- /dev/null +++ b/drivers/net/xenvirt/virtio_logs.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_LOGS_H_ +#define _VIRTIO_LOGS_H_ + +#include + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT +#define PMD_INIT_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +#else +#define PMD_INIT_LOG(level, fmt, args...) do { } while(0) +#define PMD_INIT_FUNC_TRACE() do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while(0) +#endif + + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args) +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while(0) +#endif + +#endif /* _VIRTIO_LOGS_H_ */ diff --git a/drivers/net/xenvirt/virtqueue.h b/drivers/net/xenvirt/virtqueue.h new file mode 100644 index 0000000000..eff6208623 --- /dev/null +++ b/drivers/net/xenvirt/virtqueue.h @@ -0,0 +1,280 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTQUEUE_H_ +#define _VIRTQUEUE_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "virtio_logs.h" + +struct rte_mbuf; + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +/* + * Address translatio is between gva<->hva, + * rather than gpa<->hva in virito spec. + */ +#define RTE_MBUF_DATA_DMA_ADDR(mb) \ + rte_pktmbuf_mtod(mb, uint64_t) + +enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; + +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +#define VIRTQUEUE_MAX_NAME_SZ 32 + +struct pmd_internals { + struct rte_eth_stats eth_stats; + int port_id; + int virtio_idx; +}; + + +struct virtqueue { + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + struct rte_mempool *mpool; /**< mempool for mbuf allocation */ + uint16_t queue_id; /**< DPDK queue index. */ + uint16_t vq_queue_index; /**< PCI queue index */ + uint8_t port_id; /**< Device port identifier. */ + + void *vq_ring_virt_mem; /**< virtual address of vring*/ + int vq_alignment; + int vq_ring_size; + + struct vring vq_ring; /**< vring keeping desc, used and avail */ + struct pmd_internals *internals; /**< virtio device internal info. */ + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_desc_head_idx; + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_used_cons_idx; /**< Last consumed desc in used table, trails vq_ring.used->idx*/ + + struct vq_desc_extra { + void *cookie; + uint16_t ndescs; + } vq_descx[0] __rte_cache_aligned; +}; + + +#ifdef RTE_LIBRTE_XENVIRT_DEBUG_DUMP +#define VIRTQUEUE_DUMP(vq) do { \ + uint16_t used_idx, nused; \ + used_idx = (vq)->vq_ring.used->idx; \ + nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ + PMD_INIT_LOG(DEBUG, \ + "VQ: %s - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ + " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ + " avail.flags=0x%x; used.flags=0x%x\n", \ + (vq)->vq_name, (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ + (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ + (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ + (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ +} while (0) +#else +#define VIRTQUEUE_DUMP(vq) do { } while (0) +#endif + + +/** + * Dump virtqueue internal structures, for debug purpose only. + */ +void virtqueue_dump(struct virtqueue *vq); + +/** + * Get all mbufs to be freed. + */ +struct rte_mbuf * virtqueue_detatch_unused(struct virtqueue *vq); + +static inline int __attribute__((always_inline)) +virtqueue_full(const struct virtqueue *vq) +{ + return (vq->vq_free_cnt == 0); +} + +#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) + +static inline void __attribute__((always_inline)) +vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) +{ + uint16_t avail_idx; + /* + * Place the head of the descriptor chain into the next slot and make + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. + */ + avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1)); + vq->vq_ring.avail->ring[avail_idx] = desc_idx; + rte_compiler_barrier(); /* wmb , for IA memory model barrier is enough*/ + vq->vq_ring.avail->idx++; +} + +static inline void __attribute__((always_inline)) +vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) +{ + struct vring_desc *dp; + struct vq_desc_extra *dxp; + + dp = &vq->vq_ring.desc[desc_idx]; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); + while (dp->flags & VRING_DESC_F_NEXT) { + dp = &vq->vq_ring.desc[dp->next]; + } + dxp->ndescs = 0; + + /* + * We must append the existing free chain, if any, to the end of + * newly freed chain. If the virtqueue was completely used, then + * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). + */ + dp->next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = desc_idx; +} + +static inline int __attribute__((always_inline)) +virtqueue_enqueue_recv_refill(struct virtqueue *rxvq, struct rte_mbuf *cookie) +{ + const uint16_t needed = 1; + const uint16_t head_idx = rxvq->vq_desc_head_idx; + struct vring_desc *start_dp = rxvq->vq_ring.desc; + struct vq_desc_extra *dxp; + + if (unlikely(rxvq->vq_free_cnt == 0)) + return -ENOSPC; + if (unlikely(rxvq->vq_free_cnt < needed)) + return -EMSGSIZE; + if (unlikely(head_idx >= rxvq->vq_nentries)) + return -EFAULT; + + dxp = &rxvq->vq_descx[head_idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp[head_idx].addr = + (uint64_t) ((uint64_t)cookie->buf_addr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr)); + start_dp[head_idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr); + start_dp[head_idx].flags = VRING_DESC_F_WRITE; + rxvq->vq_desc_head_idx = start_dp[head_idx].next; + rxvq->vq_free_cnt = (uint16_t)(rxvq->vq_free_cnt - needed); + vq_ring_update_avail(rxvq, head_idx); + + return 0; +} + +static inline int __attribute__((always_inline)) +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) +{ + + const uint16_t needed = 2; + struct vring_desc *start_dp = txvq->vq_ring.desc; + uint16_t head_idx = txvq->vq_desc_head_idx; + uint16_t idx = head_idx; + struct vq_desc_extra *dxp; + + if (unlikely(txvq->vq_free_cnt == 0)) + return -ENOSPC; + if (unlikely(txvq->vq_free_cnt < needed)) + return -EMSGSIZE; + if (unlikely(head_idx >= txvq->vq_nentries)) + return -EFAULT; + + dxp = &txvq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = txvq->vq_ring.desc; + start_dp[idx].addr = 0; +/* + * TODO: save one desc here? + */ + start_dp[idx].len = sizeof(struct virtio_net_hdr); + start_dp[idx].flags = VRING_DESC_F_NEXT; + start_dp[idx].addr = (uintptr_t)NULL; + idx = start_dp[idx].next; + start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie); + start_dp[idx].len = cookie->data_len; + start_dp[idx].flags = 0; + idx = start_dp[idx].next; + txvq->vq_desc_head_idx = idx; + txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); + vq_ring_update_avail(txvq, head_idx); + + return 0; +} + +static inline uint16_t __attribute__((always_inline)) +virtqueue_dequeue_burst(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num) +{ + struct vring_used_elem *uep; + struct rte_mbuf *cookie; + uint16_t used_idx, desc_idx; + uint16_t i; + /* Caller does the check */ + for (i = 0; i < num ; i ++) { + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + desc_idx = (uint16_t) uep->id; + cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; + if (unlikely(cookie == NULL)) { + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + vq->vq_used_cons_idx); + RTE_LOG(ERR, PMD, "%s: inconsistent (%u, %u)\n", __func__, used_idx , desc_idx); + break; + } + len[i] = uep->len; + rx_pkts[i] = cookie; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + vq->vq_descx[desc_idx].cookie = NULL; + } + return i; +} + +#endif /* _VIRTQUEUE_H_ */ diff --git a/lib/Makefile b/lib/Makefile index 81cf0c1e20..5f480f9590 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -41,7 +41,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether -DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index bc1e90d419..077ea99fef 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -50,7 +50,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem CFLAGS += -I$(RTE_SDK)/drivers/net/ring CFLAGS += -I$(RTE_SDK)/drivers/net/pcap CFLAGS += -I$(RTE_SDK)/drivers/net/af_packet -CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_xenvirt +CFLAGS += -I$(RTE_SDK)/drivers/net/xenvirt CFLAGS += $(WERROR_FLAGS) -O3 # specific to linuxapp exec-env diff --git a/lib/librte_pmd_xenvirt/Makefile b/lib/librte_pmd_xenvirt/Makefile deleted file mode 100644 index f0c796cea6..0000000000 --- a/lib/librte_pmd_xenvirt/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# BSD LICENSE -# -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# library name -# -LIB = librte_pmd_xenvirt.a - -CFLAGS += -O3 -CFLAGS += $(WERROR_FLAGS) - -EXPORT_MAP := rte_eth_xenvirt_version.map - -LIBABIVER := 1 - -# -# all source are stored in SRCS-y -# -SRCS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += rte_eth_xenvirt.c rte_mempool_gntalloc.c rte_xen_lib.c - -# -# Export include files -# -SYMLINK-y-include += rte_eth_xenvirt.h - -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_eal lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_mempool lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_net lib/librte_malloc -DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += lib/librte_cmdline - -include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pmd_xenvirt/rte_eth_xenvirt.c b/lib/librte_pmd_xenvirt/rte_eth_xenvirt.c deleted file mode 100644 index 73e8bce02a..0000000000 --- a/lib/librte_pmd_xenvirt/rte_eth_xenvirt.c +++ /dev/null @@ -1,714 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 -#include -#else -#include -#endif -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rte_xen_lib.h" -#include "virtqueue.h" -#include "rte_eth_xenvirt.h" - -#define VQ_DESC_NUM 256 -#define VIRTIO_MBUF_BURST_SZ 64 - -/* virtio_idx is increased after new device is created.*/ -static int virtio_idx = 0; - -static const char *drivername = "xen dummy virtio PMD"; - -static struct rte_eth_link pmd_link = { - .link_speed = 10000, - .link_duplex = ETH_LINK_FULL_DUPLEX, - .link_status = 0 -}; - -static inline struct rte_mbuf * -rte_rxmbuf_alloc(struct rte_mempool *mp) -{ - struct rte_mbuf *m; - - m = __rte_mbuf_raw_alloc(mp); - __rte_mbuf_sanity_check_raw(m, 0); - - return m; -} - - -static uint16_t -eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) -{ - struct virtqueue *rxvq = q; - struct rte_mbuf *rxm, *new_mbuf; - uint16_t nb_used, num; - uint32_t len[VIRTIO_MBUF_BURST_SZ]; - uint32_t i; - struct pmd_internals *pi = rxvq->internals; - - nb_used = VIRTQUEUE_NUSED(rxvq); - - rte_compiler_barrier(); /* rmb */ - num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); - num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); - if (unlikely(num == 0)) return 0; - - num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num); - PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num); - for (i = 0; i < num ; i ++) { - rxm = rx_pkts[i]; - PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); - rxm->next = NULL; - rxm->data_off = RTE_PKTMBUF_HEADROOM; - rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr)); - rxm->nb_segs = 1; - rxm->port = pi->port_id; - rxm->pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr)); - } - /* allocate new mbuf for the used descriptor */ - while (likely(!virtqueue_full(rxvq))) { - new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); - if (unlikely(new_mbuf == NULL)) { - break; - } - if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) { - rte_pktmbuf_free_seg(new_mbuf); - break; - } - } - pi->eth_stats.ipackets += num; - return num; -} - -static uint16_t -eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - struct virtqueue *txvq = tx_queue; - struct rte_mbuf *txm; - uint16_t nb_used, nb_tx, num, i; - int error; - uint32_t len[VIRTIO_MBUF_BURST_SZ]; - struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ]; - struct pmd_internals *pi = txvq->internals; - - nb_tx = 0; - - if (unlikely(nb_pkts == 0)) - return 0; - - PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); - nb_used = VIRTQUEUE_NUSED(txvq); - - rte_compiler_barrier(); /* rmb */ - - num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); - num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num); - - for (i = 0; i < num ; i ++) { - /* mergable not supported, one segment only */ - rte_pktmbuf_free_seg(snd_pkts[i]); - } - - while (nb_tx < nb_pkts) { - if (likely(!virtqueue_full(txvq))) { - /* TODO drop tx_pkts if it contains multiple segments */ - txm = tx_pkts[nb_tx]; - error = virtqueue_enqueue_xmit(txvq, txm); - if (unlikely(error)) { - if (error == ENOSPC) - PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n"); - else if (error == EMSGSIZE) - PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n"); - else - PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error); - break; - } - nb_tx++; - } else { - PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n"); - /* virtqueue_notify not needed in our para-virt solution */ - break; - } - } - pi->eth_stats.opackets += nb_tx; - return nb_tx; -} - -static int -eth_dev_configure(struct rte_eth_dev *dev __rte_unused) -{ - RTE_LOG(ERR, PMD, "%s\n", __func__); - return 0; -} - -/* - * Create a shared page between guest and host. - * Host monitors this page if it is cleared on unmap, and then - * do necessary clean up. - */ -static void -gntalloc_vring_flag(int vtidx) -{ - char key_str[PATH_MAX]; - char val_str[PATH_MAX]; - uint32_t gref_tmp; - void *ptr; - - if (grefwatch_from_alloc(&gref_tmp, &ptr)) { - RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n"); - exit(0); - } - - *(uint8_t *)ptr = MAP_FLAG; - snprintf(val_str, sizeof(val_str), "%u", gref_tmp); - snprintf(key_str, sizeof(key_str), - DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx); - xenstore_write(key_str, val_str); -} - -/* - * Notify host this virtio device is started. - * Host could start polling this device. - */ -static void -dev_start_notify(int vtidx) -{ - char key_str[PATH_MAX]; - char val_str[PATH_MAX]; - - RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx); - gntalloc_vring_flag(vtidx); - - snprintf(key_str, sizeof(key_str), "%s%s%d", - DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR, - vtidx); - snprintf(val_str, sizeof(val_str), "1"); - xenstore_write(key_str, val_str); -} - -/* - * Notify host this virtio device is stopped. - * Host could stop polling this device. - */ -static void -dev_stop_notify(int vtidx) -{ - RTE_SET_USED(vtidx); -} - - -static int -update_mac_address(struct ether_addr *mac_addrs, int vtidx) -{ - char key_str[PATH_MAX]; - char val_str[PATH_MAX]; - int rv; - - if (mac_addrs == NULL) { - RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__); - return -1; - } - rv = snprintf(key_str, sizeof(key_str), - DPDK_XENSTORE_PATH"%d_ether_addr", vtidx); - if (rv == -1) - return rv; - rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x", - mac_addrs->addr_bytes[0], - mac_addrs->addr_bytes[1], - mac_addrs->addr_bytes[2], - mac_addrs->addr_bytes[3], - mac_addrs->addr_bytes[4], - mac_addrs->addr_bytes[5]); - if (rv == -1) - return rv; - if (xenstore_write(key_str, val_str)) - return rv; - return 0; -} - - -static int -eth_dev_start(struct rte_eth_dev *dev) -{ - struct virtqueue *rxvq = dev->data->rx_queues[0]; - struct virtqueue *txvq = dev->data->tx_queues[0]; - struct rte_mbuf *m; - struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private; - int rv; - - dev->data->dev_link.link_status = 1; - while (!virtqueue_full(rxvq)) { - m = rte_rxmbuf_alloc(rxvq->mpool); - if (m == NULL) - break; - /* Enqueue allocated buffers. */ - if (virtqueue_enqueue_recv_refill(rxvq, m)) { - rte_pktmbuf_free_seg(m); - break; - } - } - - rxvq->internals = pi; - txvq->internals = pi; - - rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx); - if (rv) - return -1; - dev_start_notify(pi->virtio_idx); - - return 0; -} - -static void -eth_dev_stop(struct rte_eth_dev *dev) -{ - struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private; - - dev->data->dev_link.link_status = 0; - dev_stop_notify(pi->virtio_idx); -} - -/* - * Notify host this virtio device is closed. - * Host could do necessary clean up to this device. - */ -static void -eth_dev_close(struct rte_eth_dev *dev) -{ - RTE_SET_USED(dev); -} - -static void -eth_dev_info(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info) -{ - struct pmd_internals *internals = dev->data->dev_private; - - RTE_SET_USED(internals); - dev_info->driver_name = drivername; - dev_info->max_mac_addrs = 1; - dev_info->max_rx_pktlen = (uint32_t)2048; - dev_info->max_rx_queues = (uint16_t)1; - dev_info->max_tx_queues = (uint16_t)1; - dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; -} - -static void -eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) -{ - struct pmd_internals *internals = dev->data->dev_private; - if(stats) - rte_memcpy(stats, &internals->eth_stats, sizeof(*stats)); -} - -static void -eth_stats_reset(struct rte_eth_dev *dev) -{ - struct pmd_internals *internals = dev->data->dev_private; - /* Reset software totals */ - memset(&internals->eth_stats, 0, sizeof(internals->eth_stats)); -} - -static void -eth_queue_release(void *q __rte_unused) -{ -} - -static int -eth_link_update(struct rte_eth_dev *dev __rte_unused, - int wait_to_complete __rte_unused) -{ - return 0; -} - -/* - * Create shared vring between guest and host. - * Memory is allocated through grant alloc driver, so it is not physical continuous. - */ -static void * -gntalloc_vring_create(int queue_type, uint32_t size, int vtidx) -{ - char key_str[PATH_MAX] = {0}; - char val_str[PATH_MAX] = {0}; - void *va = NULL; - int pg_size; - uint32_t pg_num; - uint32_t *gref_arr = NULL; - phys_addr_t *pa_arr = NULL; - uint64_t start_index; - int rv; - - pg_size = getpagesize(); - size = RTE_ALIGN_CEIL(size, pg_size); - pg_num = size / pg_size; - - gref_arr = calloc(pg_num, sizeof(gref_arr[0])); - pa_arr = calloc(pg_num, sizeof(pa_arr[0])); - - if (gref_arr == NULL || pa_arr == NULL) { - RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__); - goto out; - } - - va = gntalloc(size, gref_arr, &start_index); - if (va == NULL) { - RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__); - goto out; - } - - if (get_phys_map(va, pa_arr, pg_num, pg_size)) - goto out; - - /* write in xenstore gref and pfn for each page of vring */ - if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) { - gntfree(va, size, start_index); - va = NULL; - goto out; - } - - if (queue_type == VTNET_RQ) - rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx); - else - rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx); - if (rv == -1 || xenstore_write(key_str, val_str) == -1) { - gntfree(va, size, start_index); - va = NULL; - } -out: - if (pa_arr) - free(pa_arr); - if (gref_arr) - free(gref_arr); - - return va; -} - - - -static struct virtqueue * -virtio_queue_setup(struct rte_eth_dev *dev, int queue_type) -{ - struct virtqueue *vq = NULL; - uint16_t vq_size = VQ_DESC_NUM; - int i = 0; - char vq_name[VIRTQUEUE_MAX_NAME_SZ]; - size_t size; - struct vring *vr; - - /* Allocate memory for virtqueue. */ - if (queue_type == VTNET_RQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_rvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - if (vq == NULL) { - RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__); - return NULL; - } - memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); - } else if(queue_type == VTNET_TQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_tvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - if (vq == NULL) { - RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__); - return NULL; - } - memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); - } - - memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); - - vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN; - vq->vq_nentries = vq_size; - vq->vq_free_cnt = vq_size; - /* Calcuate vring size according to virtio spec */ - size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); - vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); - /* Allocate memory for virtio vring through gntalloc driver*/ - vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size, - ((struct pmd_internals *)dev->data->dev_private)->virtio_idx); - memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); - vr = &vq->vq_ring; - vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment); - /* - * Locally maintained last consumed index, this idex trails - * vq_ring.used->idx. - */ - vq->vq_used_cons_idx = 0; - vq->vq_desc_head_idx = 0; - vq->vq_free_cnt = vq->vq_nentries; - memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); - - /* Chain all the descriptors in the ring with an END */ - for (i = 0; i < vq_size - 1; i++) - vr->desc[i].next = (uint16_t)(i + 1); - vr->desc[i].next = VQ_RING_DESC_CHAIN_END; - - return vq; -} - -static int -eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id, - uint16_t nb_rx_desc __rte_unused, - unsigned int socket_id __rte_unused, - const struct rte_eth_rxconf *rx_conf __rte_unused, - struct rte_mempool *mb_pool) -{ - struct virtqueue *vq; - vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ); - vq->mpool = mb_pool; - return 0; -} - -static int -eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, - uint16_t nb_tx_desc __rte_unused, - unsigned int socket_id __rte_unused, - const struct rte_eth_txconf *tx_conf __rte_unused) -{ - dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ); - return 0; -} - - - -static const struct eth_dev_ops ops = { - .dev_start = eth_dev_start, - .dev_stop = eth_dev_stop, - .dev_close = eth_dev_close, - .dev_configure = eth_dev_configure, - .dev_infos_get = eth_dev_info, - .rx_queue_setup = eth_rx_queue_setup, - .tx_queue_setup = eth_tx_queue_setup, - .rx_queue_release = eth_queue_release, - .tx_queue_release = eth_queue_release, - .link_update = eth_link_update, - .stats_get = eth_stats_get, - .stats_reset = eth_stats_reset, -}; - - -static int -rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict, - const char *name, const char *params) -{ - int i; - char *pairs[RTE_ETH_XENVIRT_MAX_ARGS]; - int num_of_pairs; - char *pair[2]; - char *args; - int ret = -1; - - if (params == NULL) - return 0; - - args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE); - if (args == NULL) { - RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name); - return -1; - } - rte_memcpy(args, params, strlen(params)); - - num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN), - pairs, - RTE_ETH_XENVIRT_MAX_ARGS , - RTE_ETH_XENVIRT_PAIRS_DELIM); - - for (i = 0; i < num_of_pairs; i++) { - pair[0] = NULL; - pair[1] = NULL; - rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN), - pair, 2, - RTE_ETH_XENVIRT_KEY_VALUE_DELIM); - - if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0 - || pair[1][0] == 0) { - RTE_LOG(ERR, PMD, - "Couldn't parse %s device," - "wrong key or value \n", name); - goto err; - } - - if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM, - sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) { - if (cmdline_parse_etheraddr(NULL, - pair[1], - &dict->addr, - sizeof(dict->addr)) < 0) { - RTE_LOG(ERR, PMD, - "Invalid %s device ether address\n", - name); - goto err; - } - - dict->addr_valid = 1; - } - } - - ret = 0; -err: - rte_free(args); - return ret; -} - -enum dev_action { - DEV_CREATE, - DEV_ATTACH -}; - - -static int -eth_dev_xenvirt_create(const char *name, const char *params, - const unsigned numa_node, - enum dev_action action) -{ - struct rte_eth_dev_data *data = NULL; - struct rte_pci_device *pci_dev = NULL; - struct pmd_internals *internals = NULL; - struct rte_eth_dev *eth_dev = NULL; - struct xenvirt_dict dict; - bzero(&dict, sizeof(struct xenvirt_dict)); - - RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n", - numa_node); - RTE_SET_USED(action); - - if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) { - RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__); - return -1; - } - - /* now do all data allocation - for eth_dev structure, dummy pci driver - * and internal (private) data - */ - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - goto err; - - pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node); - if (pci_dev == NULL) - goto err; - - internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node); - if (internals == NULL) - goto err; - - /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); - if (eth_dev == NULL) - goto err; - - pci_dev->numa_node = numa_node; - - data->dev_private = internals; - data->port_id = eth_dev->data->port_id; - data->nb_rx_queues = (uint16_t)1; - data->nb_tx_queues = (uint16_t)1; - data->dev_link = pmd_link; - data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0); - - if(dict.addr_valid) - memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr)); - else - eth_random_addr(&data->mac_addrs->addr_bytes[0]); - - eth_dev->data = data; - eth_dev->dev_ops = &ops; - eth_dev->pci_dev = pci_dev; - - eth_dev->rx_pkt_burst = eth_xenvirt_rx; - eth_dev->tx_pkt_burst = eth_xenvirt_tx; - - internals->virtio_idx = virtio_idx++; - internals->port_id = eth_dev->data->port_id; - - return 0; - -err: - rte_free(data); - rte_free(pci_dev); - rte_free(internals); - - return -1; -} - - -/*TODO: Support multiple process model */ -static int -rte_pmd_xenvirt_devinit(const char *name, const char *params) -{ - if (virtio_idx == 0) { - if (xenstore_init() != 0) { - RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__); - return -1; - } - if (gntalloc_open() != 0) { - RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__); - return -1; - } - } - eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE); - return 0; -} - -static struct rte_driver pmd_xenvirt_drv = { - .name = "eth_xenvirt", - .type = PMD_VDEV, - .init = rte_pmd_xenvirt_devinit, -}; - -PMD_REGISTER_DRIVER(pmd_xenvirt_drv); diff --git a/lib/librte_pmd_xenvirt/rte_eth_xenvirt.h b/lib/librte_pmd_xenvirt/rte_eth_xenvirt.h deleted file mode 100644 index fc15a636a7..0000000000 --- a/lib/librte_pmd_xenvirt/rte_eth_xenvirt.h +++ /dev/null @@ -1,62 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETH_XENVIRT_H_ -#define _RTE_ETH_XENVIRT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -/** - * Creates mempool for xen virtio PMD. - * This function uses memzone_reserve to allocate memory for meta data, - * and uses grant alloc driver to allocate memory for data area. - * The input parameters are exactly the same as rte_mempool_create. - */ -struct rte_mempool * -rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size, - unsigned cache_size, unsigned private_data_size, - rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, - int socket_id, unsigned flags); - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/librte_pmd_xenvirt/rte_eth_xenvirt_version.map b/lib/librte_pmd_xenvirt/rte_eth_xenvirt_version.map deleted file mode 100644 index dd636f72a8..0000000000 --- a/lib/librte_pmd_xenvirt/rte_eth_xenvirt_version.map +++ /dev/null @@ -1,7 +0,0 @@ -DPDK_2.0 { - global: - - rte_mempool_gntalloc_create; - - local: *; -}; diff --git a/lib/librte_pmd_xenvirt/rte_mempool_gntalloc.c b/lib/librte_pmd_xenvirt/rte_mempool_gntalloc.c deleted file mode 100644 index 3a650e8dc2..0000000000 --- a/lib/librte_pmd_xenvirt/rte_mempool_gntalloc.c +++ /dev/null @@ -1,298 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "rte_xen_lib.h" -#include "rte_eth_xenvirt.h" - -struct _gntarr { - uint32_t gref; - phys_addr_t pa; - uint64_t index; - void *va; -}; - -struct _mempool_gntalloc_info { - struct rte_mempool *mp; - uint32_t pg_num; - uint32_t *gref_arr; - phys_addr_t *pa_arr; - void *va; - uint32_t mempool_idx; - uint64_t start_index; -}; - - -static rte_atomic32_t global_xenvirt_mempool_idx = RTE_ATOMIC32_INIT(-1); - -static int -compare(const void *p1, const void *p2) -{ - return ((const struct _gntarr *)p1)->pa - ((const struct _gntarr *)p2)->pa; -} - - -static struct _mempool_gntalloc_info -_create_mempool(const char *name, unsigned elt_num, unsigned elt_size, - unsigned cache_size, unsigned private_data_size, - rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, - int socket_id, unsigned flags) -{ - struct _mempool_gntalloc_info mgi; - struct rte_mempool *mp = NULL; - struct rte_mempool_objsz objsz; - uint32_t pg_num, rpg_num, pg_shift, pg_sz; - char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */ - ssize_t sz, usz; /* usz: unused size */ - /* - * for each page allocated through xen_gntalloc driver, - * gref_arr:stores grant references, - * pa_arr: stores physical address, - * gnt_arr: stores all meta dat - */ - uint32_t *gref_arr = NULL; - phys_addr_t *pa_arr = NULL; - struct _gntarr *gnt_arr = NULL; - /* start index of the grant referances, used for dealloc*/ - uint64_t start_index; - uint32_t i, j; - int rv = 0; - struct ioctl_gntalloc_dealloc_gref arg; - - mgi.mp = NULL; - va = orig_va = uv = NULL; - pg_num = rpg_num = 0; - sz = 0; - - pg_sz = getpagesize(); - if (rte_is_power_of_2(pg_sz) == 0) { - goto out; - } - pg_shift = rte_bsf32(pg_sz); - - rte_mempool_calc_obj_size(elt_size, flags, &objsz); - sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift); - pg_num = sz >> pg_shift; - - pa_arr = calloc(pg_num, sizeof(pa_arr[0])); - gref_arr = calloc(pg_num, sizeof(gref_arr[0])); - gnt_arr = calloc(pg_num, sizeof(gnt_arr[0])); - if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL)) - goto out; - - /* grant index is continuous in ascending order */ - orig_va = gntalloc(sz, gref_arr, &start_index); - if (orig_va == NULL) - goto out; - - get_phys_map(orig_va, pa_arr, pg_num, pg_sz); - for (i = 0; i < pg_num; i++) { - gnt_arr[i].index = start_index + i * pg_sz; - gnt_arr[i].gref = gref_arr[i]; - gnt_arr[i].pa = pa_arr[i]; - gnt_arr[i].va = RTE_PTR_ADD(orig_va, i * pg_sz); - } - qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare); - - va = get_xen_virtual(sz, pg_sz); - if (va == NULL) { - goto out; - } - - /* - * map one by one, as index isn't continuous now. - * pg_num VMAs, doesn't linux has a limitation on this? - */ - for (i = 0; i < pg_num; i++) { - /* update gref_arr and pa_arr after sort */ - gref_arr[i] = gnt_arr[i].gref; - pa_arr[i] = gnt_arr[i].pa; - gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index); - if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) { - RTE_LOG(ERR, PMD, "failed to map %d pages\n", i); - goto mmap_failed; - } - } - - /* - * Check that allocated size is big enough to hold elt_num - * objects and a calcualte how many bytes are actually required. - */ - usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift); - if (usz < 0) { - mp = NULL; - i = pg_num; - goto mmap_failed; - } else { - /* unmap unused pages if any */ - uv = RTE_PTR_ADD(va, usz); - if ((usz = va + sz - uv) > 0) { - - RTE_LOG(ERR, PMD, - "%s(%s): unmap unused %zu of %zu " - "mmaped bytes @%p orig:%p\n", - __func__, name, usz, sz, uv, va); - munmap(uv, usz); - i = (sz - usz) / pg_sz; - for (; i < pg_num; i++) { - arg.count = 1; - arg.index = gnt_arr[i].index; - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg); - if (rv) { - /* shouldn't fail here */ - RTE_LOG(ERR, PMD, "va=%p pa=%p index=%p %s\n", - gnt_arr[i].va, - (void *)gnt_arr[i].pa, - (void *)arg.index, strerror(errno)); - rte_panic("gntdealloc failed when freeing pages\n"); - } - } - - rpg_num = (sz - usz) >> pg_shift; - } else - rpg_num = pg_num; - - mp = rte_mempool_xmem_create(name, elt_num, elt_size, - cache_size, private_data_size, - mp_init, mp_init_arg, - obj_init, obj_init_arg, - socket_id, flags, va, pa_arr, rpg_num, pg_shift); - - RTE_VERIFY(elt_num == mp->size); - } - mgi.mp = mp; - mgi.pg_num = rpg_num; - mgi.gref_arr = gref_arr; - mgi.pa_arr = pa_arr; - if (mp) - mgi.mempool_idx = rte_atomic32_add_return(&global_xenvirt_mempool_idx, 1); - mgi.start_index = start_index; - mgi.va = va; - - if (mp == NULL) { - i = pg_num; - goto mmap_failed; - } - -/* - * unmap only, without deallocate grant reference. - * unused pages have already been unmaped, - * unmap twice will fail, but it is safe. - */ -mmap_failed: - for (j = 0; j < i; j++) { - if (gnt_arr[i].va) - munmap(gnt_arr[i].va, pg_sz); - } -out: - if (gnt_arr) - free(gnt_arr); - if (orig_va) - munmap(orig_va, sz); - if (mp == NULL) { - if (gref_arr) - free(gref_arr); - if (pa_arr) - free(pa_arr); - - /* some gref has already been de-allocated from the list in the driver, - * so dealloc one by one, and it is safe to deallocate twice - */ - if (orig_va) { - for (i = 0; i < pg_num; i++) { - arg.index = start_index + i * pg_sz; - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg); - } - } - } - return mgi; -} - -struct rte_mempool * -rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size, - unsigned cache_size, unsigned private_data_size, - rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, - int socket_id, unsigned flags) -{ - int rv; - uint32_t i; - struct _mempool_gntalloc_info mgi; - struct ioctl_gntalloc_dealloc_gref arg; - int pg_sz = getpagesize(); - - mgi = _create_mempool(name, elt_num, elt_size, - cache_size, private_data_size, - mp_init, mp_init_arg, - obj_init, obj_init_arg, - socket_id, flags); - if (mgi.mp) { - rv = grant_gntalloc_mbuf_pool(mgi.mp, - mgi.pg_num, - mgi.gref_arr, - mgi.pa_arr, - mgi.mempool_idx); - free(mgi.gref_arr); - free(mgi.pa_arr); - if (rv == 0) - return mgi.mp; - /* - * in _create_mempool, unused pages have already been unmapped, deallocagted - * unmap and dealloc the remained ones here. - */ - munmap(mgi.va, pg_sz * mgi.pg_num); - for (i = 0; i < mgi.pg_num; i++) { - arg.index = mgi.start_index + i * pg_sz; - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg); - } - return NULL; - } - return NULL; - - - -} diff --git a/lib/librte_pmd_xenvirt/rte_xen_lib.c b/lib/librte_pmd_xenvirt/rte_xen_lib.c deleted file mode 100644 index b3932f0e06..0000000000 --- a/lib/librte_pmd_xenvirt/rte_xen_lib.c +++ /dev/null @@ -1,428 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 -#include -#else -#include -#endif -#include - -#include -#include - -#include "rte_xen_lib.h" - -/* - * The grant node format in xenstore for vring/mpool is: - * 0_rx_vring_gref = "gref1#, gref2#, gref3#" - * 0_mempool_gref = "gref1#, gref2#, gref3#" - * each gref# is a grant reference for a shared page. - * In each shared page, we store the grant_node_item items. - */ -struct grant_node_item { - uint32_t gref; - uint32_t pfn; -} __attribute__((packed)); - -/* fd for xen_gntalloc driver, used to allocate grant pages*/ -int gntalloc_fd = -1; - -/* xenstore path for local domain, now it is '/local/domain/domid/' */ -static char *dompath = NULL; -/* handle to xenstore read/write operations */ -static struct xs_handle *xs = NULL; - -/* - * Reserve a virtual address space. - * On success, returns the pointer. On failure, returns NULL. - */ -void * -get_xen_virtual(size_t size, size_t page_sz) -{ - void *addr; - uintptr_t aligned_addr; - - addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, PMD, "failed get a virtual area\n"); - return NULL; - } - - aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz); - addr = (void *)(aligned_addr); - - return addr; -} - -/* - * Get the physical address for virtual memory starting at va. - */ -int -get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz) -{ - int32_t fd, rc = 0; - uint32_t i, nb; - off_t ofs; - - ofs = (uintptr_t)va / pg_sz * sizeof(*pa); - nb = pg_num * sizeof(*pa); - - if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0 || - (rc = pread(fd, pa, nb, ofs)) < 0 || - (rc -= nb) != 0) { - RTE_LOG(ERR, PMD, "%s: failed read of %u bytes from \'%s\' " - "at offset %zu, error code: %d\n", - __func__, nb, PAGEMAP_FNAME, ofs, errno); - rc = ENOENT; - } - - close(fd); - for (i = 0; i != pg_num; i++) - pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz; - - return rc; -} - -int -gntalloc_open(void) -{ - gntalloc_fd = open(XEN_GNTALLOC_FNAME, O_RDWR); - return (gntalloc_fd != -1) ? 0 : -1; -} - -void -gntalloc_close(void) -{ - if (gntalloc_fd != -1) - close(gntalloc_fd); - gntalloc_fd = -1; -} - -void * -gntalloc(size_t size, uint32_t *gref, uint64_t *start_index) -{ - int page_size = getpagesize(); - uint32_t i, pg_num; - void *va; - int rv; - struct ioctl_gntalloc_alloc_gref *arg; - struct ioctl_gntalloc_dealloc_gref arg_d; - - if (size % page_size) { - RTE_LOG(ERR, PMD, "%s: %zu isn't multiple of page size\n", - __func__, size); - return NULL; - } - - pg_num = size / page_size; - arg = malloc(sizeof(*arg) + (pg_num - 1) * sizeof(uint32_t)); - if (arg == NULL) - return NULL; - arg->domid = DOM0_DOMID; - arg->flags = GNTALLOC_FLAG_WRITABLE; - arg->count = pg_num; - - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, arg); - if (rv) { - RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__); - free(arg); - return NULL; - } - - va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gntalloc_fd, arg->index); - if (va == MAP_FAILED) { - RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__); - arg_d.count = pg_num; - arg_d.index = arg->index; - ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg_d); - free(arg); - return NULL; - } - - if (gref) { - for (i = 0; i < pg_num; i++) { - gref[i] = arg->gref_ids[i]; - } - } - if (start_index) - *start_index = arg->index; - - free(arg); - - return va; -} - -int -grefwatch_from_alloc(uint32_t *gref, void **pptr) -{ - int rv; - void *ptr; - int pg_size = getpagesize(); - struct ioctl_gntalloc_alloc_gref arg = { - .domid = DOM0_DOMID, - .flags = GNTALLOC_FLAG_WRITABLE, - .count = 1 - }; - struct ioctl_gntalloc_dealloc_gref arg_d; - struct ioctl_gntalloc_unmap_notify notify = { - .action = UNMAP_NOTIFY_CLEAR_BYTE - }; - - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg); - if (rv) { - RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__); - return -1; - } - - ptr = (void *)mmap(NULL, pg_size, PROT_READ|PROT_WRITE, MAP_SHARED, gntalloc_fd, arg.index); - arg_d.index = arg.index; - arg_d.count = 1; - if (ptr == MAP_FAILED) { - RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__); - ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); - return -1; - } - if (pptr) - *pptr = ptr; - if (gref) - *gref = arg.gref_ids[0]; - - notify.index = arg.index; - rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, ¬ify); - if (rv) { - RTE_LOG(ERR, PMD, "%s: unmap notify failed\n", __func__); - munmap(ptr, pg_size); - ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); - return -1; - } - - return 0; -} - -void -gntfree(void *va, size_t sz, uint64_t start_index) -{ - struct ioctl_gntalloc_dealloc_gref arg_d; - - if (va && sz) { - munmap(va, sz); - arg_d.count = sz / getpagesize(); - arg_d.index = start_index; - ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d); - } -} - -static int -xenstore_cleanup(void) -{ - char store_path[PATH_MAX] = {0}; - - if (snprintf(store_path, sizeof(store_path), - "%s%s", dompath, DPDK_XENSTORE_NODE) == -1) - return -1; - - if (xs_rm(xs, XBT_NULL, store_path) == false) { - RTE_LOG(ERR, PMD, "%s: failed cleanup node\n", __func__); - return -1; - } - - return 0; -} - -int -xenstore_init(void) -{ - unsigned int len, domid; - char *buf; - static int cleanup = 0; - char *end; - - xs = xs_domain_open(); - if (xs == NULL) { - RTE_LOG(ERR, PMD,"%s: xs_domain_open failed\n", __func__); - return -1; - } - buf = xs_read(xs, XBT_NULL, "domid", &len); - if (buf == NULL) { - RTE_LOG(ERR, PMD, "%s: failed read domid\n", __func__); - return -1; - } - errno = 0; - domid = strtoul(buf, &end, 0); - if (errno != 0 || end == NULL || end == buf || domid == 0) - return -1; - - RTE_LOG(INFO, PMD, "retrieved dom ID = %d\n", domid); - - dompath = xs_get_domain_path(xs, domid); - if (dompath == NULL) - return -1; - - xs_transaction_start(xs); /* When to stop transaction */ - - if (cleanup == 0) { - if (xenstore_cleanup()) - return -1; - cleanup = 1; - } - - return 0; -} - -int -xenstore_write(const char *key_str, const char *val_str) -{ - char grant_path[PATH_MAX]; - int rv, len; - - if (xs == NULL) { - RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__); - return -1; - } - rv = snprintf(grant_path, sizeof(grant_path), "%s%s", dompath, key_str); - if (rv == -1) { - RTE_LOG(ERR, PMD, "%s: snprintf %s %s failed\n", - __func__, dompath, key_str); - return -1; - } - len = strnlen(val_str, PATH_MAX); - - if (xs_write(xs, XBT_NULL, grant_path, val_str, len) == false) { - RTE_LOG(ERR, PMD, "%s: xs_write failed\n", __func__); - return -1; - } - - return 0; -} - -int -grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size) -{ - uint64_t start_index; - int pg_size; - uint32_t pg_shift; - void *ptr = NULL; - uint32_t count, entries_per_pg; - uint32_t i, j = 0, k = 0;; - uint32_t *gref_tmp; - int first = 1; - char tmp_str[PATH_MAX] = {0}; - int rv = -1; - - pg_size = getpagesize(); - if (rte_is_power_of_2(pg_size) == 0) { - return -1; - } - pg_shift = rte_bsf32(pg_size); - if (pg_size % sizeof(struct grant_node_item)) { - RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n"); - return -1; - } - - entries_per_pg = pg_size / sizeof(struct grant_node_item); - count = (pg_num + entries_per_pg - 1 ) / entries_per_pg; - gref_tmp = malloc(count * sizeof(uint32_t)); - if (gref_tmp == NULL) - return -1; - ptr = gntalloc(pg_size * count, gref_tmp, &start_index); - if (ptr == NULL) { - RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count); - free(gref_tmp); - return -1; - } - - while (j < pg_num) { - if (first) { - rv = snprintf(val_str, str_size, "%u", gref_tmp[k]); - first = 0; - } else { - snprintf(tmp_str, PATH_MAX, "%s", val_str); - rv = snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]); - } - k++; - if (rv == -1) - break; - - for (i = 0; i < entries_per_pg && j < pg_num ; i++) { - ((struct grant_node_item *)ptr)->gref = gref_arr[j]; - ((struct grant_node_item *)ptr)->pfn = pa_arr[j] >> pg_shift; - ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item)); - j++; - } - } - if (rv == -1) { - gntfree(ptr, pg_size * count, start_index); - } else - rv = 0; - free(gref_tmp); - return rv; -} - - -int -grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx) -{ - char key_str[PATH_MAX] = {0}; - char val_str[PATH_MAX] = {0}; - - if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) { - return -1; - } - - if (snprintf(key_str, sizeof(key_str), - DPDK_XENSTORE_PATH"%d"MEMPOOL_XENSTORE_STR, mempool_idx) == -1) - return -1; - if (xenstore_write(key_str, val_str) == -1) - return -1; - - if (snprintf(key_str, sizeof(key_str), - DPDK_XENSTORE_PATH"%d"MEMPOOL_VA_XENSTORE_STR, mempool_idx) == -1) - return -1; - if (snprintf(val_str, sizeof(val_str), "%"PRIxPTR, (uintptr_t)mpool->elt_va_start) == -1) - return -1; - if (xenstore_write(key_str, val_str) == -1) - return -1; - - return 0; -} diff --git a/lib/librte_pmd_xenvirt/rte_xen_lib.h b/lib/librte_pmd_xenvirt/rte_xen_lib.h deleted file mode 100644 index 0ba7148af9..0000000000 --- a/lib/librte_pmd_xenvirt/rte_xen_lib.h +++ /dev/null @@ -1,113 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_XEN_DUMMY_PMD_H -#define _RTE_XEN_DUMMY_PMD_H - -#include - -#include -#include -#include - -#define PAGEMAP_FNAME "/proc/self/pagemap" -#define XEN_GNTALLOC_FNAME "/dev/xen/gntalloc" -#define DPDK_XENSTORE_PATH "/control/dpdk/" -#define DPDK_XENSTORE_NODE "/control/dpdk" -/*format 0_mempool_gref = "1537,1524,1533" */ -#define MEMPOOL_XENSTORE_STR "_mempool_gref" -/*format 0_mempool_va = 0x80340000 */ -#define MEMPOOL_VA_XENSTORE_STR "_mempool_va" -/*format 0_rx_vring_gref = "1537,1524,1533" */ -#define RXVRING_XENSTORE_STR "_rx_vring_gref" -/*format 0_tx_vring_gref = "1537,1524,1533" */ -#define TXVRING_XENSTORE_STR "_tx_vring_gref" -#define VRING_FLAG_STR "_vring_flag" -/*format: event_type_start_0 = 1*/ -#define EVENT_TYPE_START_STR "event_type_start_" - -#define DOM0_DOMID 0 -/* - * the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux - * Documentation). - */ -#define PAGEMAP_PFN_BITS 54 -#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t) - -#define MAP_FLAG 0xA5 - -#define RTE_ETH_XENVIRT_PAIRS_DELIM ';' -#define RTE_ETH_XENVIRT_KEY_VALUE_DELIM '=' -#define RTE_ETH_XENVIRT_MAX_ARGS 1 -#define RTE_ETH_XENVIRT_MAC_PARAM "mac" -struct xenvirt_dict { - uint8_t addr_valid; - struct ether_addr addr; -}; - -extern int gntalloc_fd; - -int -gntalloc_open(void); - -void -gntalloc_close(void); - -void * -gntalloc(size_t sz, uint32_t *gref, uint64_t *start_index); - -void -gntfree(void *va, size_t sz, uint64_t start_index); - -int -xenstore_init(void); - -int -xenstore_write(const char *key_str, const char *val_str); - -int -get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz); - -void * -get_xen_virtual(size_t size, size_t page_sz); - -int -grefwatch_from_alloc(uint32_t *gref, void **pptr); - - -int grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size); - -int -grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx); - -#endif diff --git a/lib/librte_pmd_xenvirt/virtio_logs.h b/lib/librte_pmd_xenvirt/virtio_logs.h deleted file mode 100644 index d6c33f7b38..0000000000 --- a/lib/librte_pmd_xenvirt/virtio_logs.h +++ /dev/null @@ -1,70 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VIRTIO_LOGS_H_ -#define _VIRTIO_LOGS_H_ - -#include - -#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT -#define PMD_INIT_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) -#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") -#else -#define PMD_INIT_LOG(level, fmt, args...) do { } while(0) -#define PMD_INIT_FUNC_TRACE() do { } while(0) -#endif - -#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX -#define PMD_RX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args) -#else -#define PMD_RX_LOG(level, fmt, args...) do { } while(0) -#endif - -#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX -#define PMD_TX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args) -#else -#define PMD_TX_LOG(level, fmt, args...) do { } while(0) -#endif - - -#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER -#define PMD_DRV_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args) -#else -#define PMD_DRV_LOG(level, fmt, args...) do { } while(0) -#endif - -#endif /* _VIRTIO_LOGS_H_ */ diff --git a/lib/librte_pmd_xenvirt/virtqueue.h b/lib/librte_pmd_xenvirt/virtqueue.h deleted file mode 100644 index eff6208623..0000000000 --- a/lib/librte_pmd_xenvirt/virtqueue.h +++ /dev/null @@ -1,280 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VIRTQUEUE_H_ -#define _VIRTQUEUE_H_ - -#include -#include -#include - -#include -#include -#include -#include - -#include "virtio_logs.h" - -struct rte_mbuf; - -/* The alignment to use between consumer and producer parts of vring. */ -#define VIRTIO_PCI_VRING_ALIGN 4096 - -/* - * Address translatio is between gva<->hva, - * rather than gpa<->hva in virito spec. - */ -#define RTE_MBUF_DATA_DMA_ADDR(mb) \ - rte_pktmbuf_mtod(mb, uint64_t) - -enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; - -/** - * The maximum virtqueue size is 2^15. Use that value as the end of - * descriptor chain terminator since it will never be a valid index - * in the descriptor table. This is used to verify we are correctly - * handling vq_free_cnt. - */ -#define VQ_RING_DESC_CHAIN_END 32768 - -#define VIRTQUEUE_MAX_NAME_SZ 32 - -struct pmd_internals { - struct rte_eth_stats eth_stats; - int port_id; - int virtio_idx; -}; - - -struct virtqueue { - char vq_name[VIRTQUEUE_MAX_NAME_SZ]; - struct rte_mempool *mpool; /**< mempool for mbuf allocation */ - uint16_t queue_id; /**< DPDK queue index. */ - uint16_t vq_queue_index; /**< PCI queue index */ - uint8_t port_id; /**< Device port identifier. */ - - void *vq_ring_virt_mem; /**< virtual address of vring*/ - int vq_alignment; - int vq_ring_size; - - struct vring vq_ring; /**< vring keeping desc, used and avail */ - struct pmd_internals *internals; /**< virtio device internal info. */ - uint16_t vq_nentries; /**< vring desc numbers */ - uint16_t vq_desc_head_idx; - uint16_t vq_free_cnt; /**< num of desc available */ - uint16_t vq_used_cons_idx; /**< Last consumed desc in used table, trails vq_ring.used->idx*/ - - struct vq_desc_extra { - void *cookie; - uint16_t ndescs; - } vq_descx[0] __rte_cache_aligned; -}; - - -#ifdef RTE_LIBRTE_XENVIRT_DEBUG_DUMP -#define VIRTQUEUE_DUMP(vq) do { \ - uint16_t used_idx, nused; \ - used_idx = (vq)->vq_ring.used->idx; \ - nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ - PMD_INIT_LOG(DEBUG, \ - "VQ: %s - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ - " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ - " avail.flags=0x%x; used.flags=0x%x\n", \ - (vq)->vq_name, (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ - (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ - (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ - (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ -} while (0) -#else -#define VIRTQUEUE_DUMP(vq) do { } while (0) -#endif - - -/** - * Dump virtqueue internal structures, for debug purpose only. - */ -void virtqueue_dump(struct virtqueue *vq); - -/** - * Get all mbufs to be freed. - */ -struct rte_mbuf * virtqueue_detatch_unused(struct virtqueue *vq); - -static inline int __attribute__((always_inline)) -virtqueue_full(const struct virtqueue *vq) -{ - return (vq->vq_free_cnt == 0); -} - -#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) - -static inline void __attribute__((always_inline)) -vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) -{ - uint16_t avail_idx; - /* - * Place the head of the descriptor chain into the next slot and make - * it usable to the host. The chain is made available now rather than - * deferring to virtqueue_notify() in the hopes that if the host is - * currently running on another CPU, we can keep it processing the new - * descriptor. - */ - avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1)); - vq->vq_ring.avail->ring[avail_idx] = desc_idx; - rte_compiler_barrier(); /* wmb , for IA memory model barrier is enough*/ - vq->vq_ring.avail->idx++; -} - -static inline void __attribute__((always_inline)) -vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) -{ - struct vring_desc *dp; - struct vq_desc_extra *dxp; - - dp = &vq->vq_ring.desc[desc_idx]; - dxp = &vq->vq_descx[desc_idx]; - vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); - while (dp->flags & VRING_DESC_F_NEXT) { - dp = &vq->vq_ring.desc[dp->next]; - } - dxp->ndescs = 0; - - /* - * We must append the existing free chain, if any, to the end of - * newly freed chain. If the virtqueue was completely used, then - * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). - */ - dp->next = vq->vq_desc_head_idx; - vq->vq_desc_head_idx = desc_idx; -} - -static inline int __attribute__((always_inline)) -virtqueue_enqueue_recv_refill(struct virtqueue *rxvq, struct rte_mbuf *cookie) -{ - const uint16_t needed = 1; - const uint16_t head_idx = rxvq->vq_desc_head_idx; - struct vring_desc *start_dp = rxvq->vq_ring.desc; - struct vq_desc_extra *dxp; - - if (unlikely(rxvq->vq_free_cnt == 0)) - return -ENOSPC; - if (unlikely(rxvq->vq_free_cnt < needed)) - return -EMSGSIZE; - if (unlikely(head_idx >= rxvq->vq_nentries)) - return -EFAULT; - - dxp = &rxvq->vq_descx[head_idx]; - dxp->cookie = (void *)cookie; - dxp->ndescs = needed; - - start_dp[head_idx].addr = - (uint64_t) ((uint64_t)cookie->buf_addr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr)); - start_dp[head_idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr); - start_dp[head_idx].flags = VRING_DESC_F_WRITE; - rxvq->vq_desc_head_idx = start_dp[head_idx].next; - rxvq->vq_free_cnt = (uint16_t)(rxvq->vq_free_cnt - needed); - vq_ring_update_avail(rxvq, head_idx); - - return 0; -} - -static inline int __attribute__((always_inline)) -virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) -{ - - const uint16_t needed = 2; - struct vring_desc *start_dp = txvq->vq_ring.desc; - uint16_t head_idx = txvq->vq_desc_head_idx; - uint16_t idx = head_idx; - struct vq_desc_extra *dxp; - - if (unlikely(txvq->vq_free_cnt == 0)) - return -ENOSPC; - if (unlikely(txvq->vq_free_cnt < needed)) - return -EMSGSIZE; - if (unlikely(head_idx >= txvq->vq_nentries)) - return -EFAULT; - - dxp = &txvq->vq_descx[idx]; - dxp->cookie = (void *)cookie; - dxp->ndescs = needed; - - start_dp = txvq->vq_ring.desc; - start_dp[idx].addr = 0; -/* - * TODO: save one desc here? - */ - start_dp[idx].len = sizeof(struct virtio_net_hdr); - start_dp[idx].flags = VRING_DESC_F_NEXT; - start_dp[idx].addr = (uintptr_t)NULL; - idx = start_dp[idx].next; - start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie); - start_dp[idx].len = cookie->data_len; - start_dp[idx].flags = 0; - idx = start_dp[idx].next; - txvq->vq_desc_head_idx = idx; - txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); - vq_ring_update_avail(txvq, head_idx); - - return 0; -} - -static inline uint16_t __attribute__((always_inline)) -virtqueue_dequeue_burst(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num) -{ - struct vring_used_elem *uep; - struct rte_mbuf *cookie; - uint16_t used_idx, desc_idx; - uint16_t i; - /* Caller does the check */ - for (i = 0; i < num ; i ++) { - used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - uep = &vq->vq_ring.used->ring[used_idx]; - desc_idx = (uint16_t) uep->id; - cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; - if (unlikely(cookie == NULL)) { - PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", - vq->vq_used_cons_idx); - RTE_LOG(ERR, PMD, "%s: inconsistent (%u, %u)\n", __func__, used_idx , desc_idx); - break; - } - len[i] = uep->len; - rx_pkts[i] = cookie; - vq->vq_used_cons_idx++; - vq_ring_free_chain(vq, desc_idx); - vq->vq_descx[desc_idx].cookie = NULL; - } - return i; -} - -#endif /* _VIRTQUEUE_H_ */