#include <rte_tcp.h>
#include <rte_pause.h>
+#include "ioat.h"
#include "main.h"
#ifndef MAX_QUEUES
#define INVALID_PORT_ID 0xFF
-/* Max number of devices. Limited by vmdq. */
-#define MAX_DEVICES 64
-
-/* Size of buffers used for snprintfs. */
-#define MAX_PRINT_BUFF 6072
-
/* Maximum long option length for option parsing. */
#define MAX_LONG_OPT_SZ 64
static uint32_t enable_tso;
static int client_mode;
-static int dequeue_zero_copy;
static int builtin_net_driver;
+static int async_vhost_driver;
+
+static char dma_type[MAX_LONG_OPT_SZ];
+
/* Specify timeout (in useconds) between retries on RX. */
static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
/* Specify the number of retries on RX. */
.rxmode = {
.mq_mode = ETH_MQ_RX_VMDQ_ONLY,
.split_hdr_size = 0,
- .ignore_offload_bitfield = 1,
/*
* VLAN strip is necessary for 1G NIC such as I350,
* this fixes bug of ipv4 forwarding in guest can't
* forward pakets from one virtio dev to another virtio dev.
*/
- .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
- DEV_RX_OFFLOAD_VLAN_STRIP),
+ .offloads = DEV_RX_OFFLOAD_VLAN_STRIP,
},
.txmode = {
};
/* ethernet addresses of ports */
-static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
+static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
static struct vhost_dev_tailq_list vhost_dev_list =
TAILQ_HEAD_INITIALIZER(vhost_dev_list);
/ US_PER_S * BURST_TX_DRAIN_US)
#define VLAN_HLEN 4
+static inline int
+open_dma(const char *value)
+{
+ if (strncmp(dma_type, "ioat", 4) == 0)
+ return open_ioat(value);
+
+ return -1;
+}
+
/*
* Builds up the correct configuration for VMDQ VLAN pool map
* according to the pool & queue limits.
return 0;
}
-/*
- * Validate the device number according to the max pool number gotten form
- * dev_info. If the device number is invalid, give the error message and
- * return -1. Each device must have its own pool.
- */
-static inline int
-validate_num_devices(uint32_t max_nb_devices)
-{
- if (num_devices > max_nb_devices) {
- RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
- return -1;
- }
- return 0;
-}
-
/*
* Initialises a given port using global settings and with the rx buffers
* coming from the mbuf_pool passed as parameter
uint16_t q;
/* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
- rte_eth_dev_info_get (port, &dev_info);
+ retval = rte_eth_dev_info_get(port, &dev_info);
+ if (retval != 0) {
+ RTE_LOG(ERR, VHOST_PORT,
+ "Error during getting device (port %u) info: %s\n",
+ port, strerror(-retval));
+
+ return retval;
+ }
rxconf = &dev_info.default_rxconf;
txconf = &dev_info.default_txconf;
rxconf->rx_drop_en = 1;
- txconf->txq_flags = ETH_TXQ_FLAGS_IGNORE;
/*configure the number of supported virtio devices based on VMDQ limits */
num_devices = dev_info.max_vmdq_pools;
rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
- /*
- * When dequeue zero copy is enabled, guest Tx used vring will be
- * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
- * (tx_ring_size here) must be small enough so that the driver will
- * hit the free threshold easily and free mbufs timely. Otherwise,
- * guest Tx vring would be starved.
- */
- if (dequeue_zero_copy)
- tx_ring_size = 64;
-
tx_rings = (uint16_t)rte_lcore_count();
- retval = validate_num_devices(MAX_DEVICES);
- if (retval < 0)
- return retval;
-
/* Get port configuration. */
retval = get_eth_conf(&port_conf, num_devices);
if (retval < 0)
printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
num_pf_queues, num_devices, queues_per_pool);
- if (port >= rte_eth_dev_count()) return -1;
+ if (!rte_eth_dev_is_valid_port(port))
+ return -1;
rx_rings = (uint16_t)dev_info.max_rx_queues;
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
return retval;
}
- if (promiscuous)
- rte_eth_promiscuous_enable(port);
+ if (promiscuous) {
+ retval = rte_eth_promiscuous_enable(port);
+ if (retval != 0) {
+ RTE_LOG(ERR, VHOST_PORT,
+ "Failed to enable promiscuous mode on port %u: %s\n",
+ port, rte_strerror(-retval));
+ return retval;
+ }
+ }
+
+ retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+ if (retval < 0) {
+ RTE_LOG(ERR, VHOST_PORT,
+ "Failed to get MAC address on port %u: %s\n",
+ port, rte_strerror(-retval));
+ return retval;
+ }
- rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
static int
us_vhost_parse_socket_path(const char *q_arg)
{
+ char *old;
+
/* parse number string */
if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
return -1;
+ old = socket_files;
socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
- snprintf(socket_files + nb_sockets * PATH_MAX, PATH_MAX, "%s", q_arg);
+ if (socket_files == NULL) {
+ free(old);
+ return -1;
+ }
+
+ strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX);
nb_sockets++;
return 0;
/* parse hexadecimal string */
pm = strtoul(portmask, &end, 16);
if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
- return -1;
-
- if (pm == 0)
- return -1;
+ return 0;
return pm;
" --tx-csum [0|1] disable/enable TX checksum offload.\n"
" --tso [0|1] disable/enable TCP segment offload.\n"
" --client register a vhost-user socket as client mode.\n"
- " --dequeue-zero-copy enables dequeue zero copy\n",
+ " --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
+ " --dmas register dma channel for specific vhost device.\n",
prgname);
}
{"tx-csum", required_argument, NULL, 0},
{"tso", required_argument, NULL, 0},
{"client", no_argument, &client_mode, 1},
- {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+ {"dma-type", required_argument, NULL, 0},
+ {"dmas", required_argument, NULL, 0},
{NULL, 0, 0, 0},
};
}
}
+ if (!strncmp(long_option[option_index].name,
+ "dma-type", MAX_LONG_OPT_SZ)) {
+ if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Wrong DMA type\n");
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ strcpy(dma_type, optarg);
+ }
+
+ if (!strncmp(long_option[option_index].name,
+ "dmas", MAX_LONG_OPT_SZ)) {
+ if (open_dma(optarg) == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Wrong DMA args\n");
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ async_vhost_driver = 1;
+ }
+
break;
/* Invalid option - print options. */
}
for (portid = 0; portid < num_ports; portid ++) {
- if (ports[portid] >= nb_ports) {
- RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
- ports[portid], (nb_ports - 1));
+ if (!rte_eth_dev_is_valid_port(ports[portid])) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "\nSpecified port ID(%u) is not valid\n",
+ ports[portid]);
ports[portid] = INVALID_PORT_ID;
valid_num_ports--;
}
}
static __rte_always_inline struct vhost_dev *
-find_vhost_dev(struct ether_addr *mac)
+find_vhost_dev(struct rte_ether_addr *mac)
{
struct vhost_dev *vdev;
TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
if (vdev->ready == DEVICE_RX &&
- is_same_ether_addr(mac, &vdev->mac_address))
+ rte_is_same_ether_addr(mac, &vdev->mac_address))
return vdev;
}
static int
link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
{
- struct ether_hdr *pkt_hdr;
+ struct rte_ether_hdr *pkt_hdr;
int i, ret;
/* Learn MAC address of guest device from packet */
- pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
if (find_vhost_dev(&pkt_hdr->s_addr)) {
RTE_LOG(ERR, VHOST_DATA,
return -1;
}
- for (i = 0; i < ETHER_ADDR_LEN; i++)
+ for (i = 0; i < RTE_ETHER_ADDR_LEN; i++)
vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
/* vlan_tag currently uses the device_id. */
struct rte_mbuf *m)
{
uint16_t ret;
+ struct rte_mbuf *m_cpl[1], *comp_pkt;
+ uint32_t nr_comp = 0;
if (builtin_net_driver) {
ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+ } else if (async_vhost_driver) {
+ ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+ &m, 1, &comp_pkt, &nr_comp);
+ if (nr_comp == 1)
+ goto done;
+
+ if (likely(ret))
+ dst_vdev->nr_async_pkts++;
+
+ while (likely(dst_vdev->nr_async_pkts)) {
+ if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+ VIRTIO_RXQ, m_cpl, 1))
+ dst_vdev->nr_async_pkts--;
+ }
} else {
ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
}
+done:
if (enable_stats) {
rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
static __rte_always_inline int
virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
{
- struct ether_hdr *pkt_hdr;
+ struct rte_ether_hdr *pkt_hdr;
struct vhost_dev *dst_vdev;
- pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
if (!dst_vdev)
uint32_t *offset, uint16_t *vlan_tag)
{
struct vhost_dev *dst_vdev;
- struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ struct rte_ether_hdr *pkt_hdr =
+ rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
if (!dst_vdev)
{
if (ol_flags & PKT_TX_IPV4)
return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
- else /* assume ethertype == ETHER_TYPE_IPv6 */
+ else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
}
static void virtio_tx_offload(struct rte_mbuf *m)
{
void *l3_hdr;
- struct ipv4_hdr *ipv4_hdr = NULL;
- struct tcp_hdr *tcp_hdr = NULL;
- struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ struct rte_ipv4_hdr *ipv4_hdr = NULL;
+ struct rte_tcp_hdr *tcp_hdr = NULL;
+ struct rte_ether_hdr *eth_hdr =
+ rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
l3_hdr = (char *)eth_hdr + m->l2_len;
m->ol_flags |= PKT_TX_IP_CKSUM;
}
- tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
+ tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len);
tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
}
struct mbuf_table *tx_q;
unsigned offset = 0;
const uint16_t lcore_id = rte_lcore_id();
- struct ether_hdr *nh;
+ struct rte_ether_hdr *nh;
- nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
- if (unlikely(is_broadcast_ether_addr(&nh->d_addr))) {
+ nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ if (unlikely(rte_is_broadcast_ether_addr(&nh->d_addr))) {
struct vhost_dev *vdev2;
TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
/*Add packet to the port tx queue*/
tx_q = &lcore_tx_queue[lcore_id];
- nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
- if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
+ nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) {
/* Guest has inserted the vlan tag. */
- struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1);
+ struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1);
uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
if ((vm2vm_mode == VM2VM_HARDWARE) &&
(vh->vlan_tci != vlan_tag_be))
}
}
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+ struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+ uint16_t complete_count;
+
+ complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+ qid, p_cpl, MAX_PKT_BURST);
+ vdev->nr_async_pkts -= complete_count;
+ if (complete_count)
+ free_pkts(p_cpl, complete_count);
+}
+
static __rte_always_inline void
drain_eth_rx(struct vhost_dev *vdev)
{
uint16_t rx_count, enqueue_count;
- struct rte_mbuf *pkts[MAX_PKT_BURST];
+ struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
+ uint32_t nr_comp = 0;
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
pkts, MAX_PKT_BURST);
+
+ while (likely(vdev->nr_async_pkts))
+ complete_async_pkts(vdev, VIRTIO_RXQ);
+
if (!rx_count)
return;
if (builtin_net_driver) {
enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
pkts, rx_count);
+ } else if (async_vhost_driver) {
+ enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+ VIRTIO_RXQ, pkts, rx_count, comp_pkts,
+ &nr_comp);
+ if (nr_comp > 0) {
+ free_pkts(comp_pkts, nr_comp);
+ enqueue_count -= nr_comp;
+ }
+ vdev->nr_async_pkts += enqueue_count;
} else {
enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
pkts, rx_count);
}
+
if (enable_stats) {
rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
}
- free_pkts(pkts, rx_count);
+ if (!async_vhost_driver)
+ free_pkts(pkts, rx_count);
}
static __rte_always_inline void
/* Set the dev_removal_flag on each lcore. */
- RTE_LCORE_FOREACH_SLAVE(lcore)
+ RTE_LCORE_FOREACH_WORKER(lcore)
lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
/*
* we can be sure that they can no longer access the device removed
* from the linked lists and that the devices are no longer in use.
*/
- RTE_LCORE_FOREACH_SLAVE(lcore) {
+ RTE_LCORE_FOREACH_WORKER(lcore) {
while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
rte_pause();
}
"(%d) device has been removed from data core\n",
vdev->vid);
+ if (async_vhost_driver)
+ rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
rte_free(vdev);
}
/*
* A new device is added to a data core. First the device is added to the main linked list
- * and the allocated to a specific data core.
+ * and then allocated to a specific data core.
*/
static int
new_device(int vid)
int lcore, core_add = 0;
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
-
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
RTE_LOG(INFO, VHOST_DATA,
vdev->remove = 0;
/* Find a suitable lcore to add the device. */
- RTE_LCORE_FOREACH_SLAVE(lcore) {
+ RTE_LCORE_FOREACH_WORKER(lcore) {
if (lcore_info[lcore].device_num < device_num_min) {
device_num_min = lcore_info[lcore].device_num;
core_add = lcore;
"(%d) device has been added to data core %d\n",
vid, vdev->coreid);
+ if (async_vhost_driver) {
+ struct rte_vhost_async_features f;
+ struct rte_vhost_async_channel_ops channel_ops;
+ if (strncmp(dma_type, "ioat", 4) == 0) {
+ channel_ops.transfer_data = ioat_transfer_data_cb;
+ channel_ops.check_completed_copies =
+ ioat_check_completed_copies_cb;
+ f.async_inorder = 1;
+ f.async_threshold = 256;
+ return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+ f.intval, &channel_ops);
+ }
+ }
+
return 0;
}
* This is a thread will wake up after a period to print stats if the user has
* enabled them.
*/
-static void
-print_stats(void)
+static void *
+print_stats(__rte_unused void *arg)
{
struct vhost_dev *vdev;
uint64_t tx_dropped, rx_dropped;
}
printf("===================================================\n");
+
+ fflush(stdout);
}
+
+ return NULL;
}
static void
int ret, i;
uint16_t portid;
static pthread_t tid;
- char thread_name[RTE_MAX_THREAD_NAME_LEN];
uint64_t flags = 0;
signal(SIGINT, sigint_handler);
rte_exit(EXIT_FAILURE,"Not enough cores\n");
/* Get the number of physical ports. */
- nb_ports = rte_eth_dev_count();
+ nb_ports = rte_eth_dev_count_avail();
/*
* Update the global var NUM_PORTS and global array PORTS
/* Enable stats if the user option is set. */
if (enable_stats) {
- ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
- if (ret != 0)
+ ret = rte_ctrl_thread_create(&tid, "print-stats", NULL,
+ print_stats, NULL);
+ if (ret < 0)
rte_exit(EXIT_FAILURE,
"Cannot create print-stats thread\n");
-
- /* Set thread_name for aid in debugging. */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
- ret = rte_thread_setname(tid, thread_name);
- if (ret != 0)
- RTE_LOG(DEBUG, VHOST_CONFIG,
- "Cannot set print-stats name\n");
}
/* Launch all data cores. */
- RTE_LCORE_FOREACH_SLAVE(lcore_id)
+ RTE_LCORE_FOREACH_WORKER(lcore_id)
rte_eal_remote_launch(switch_worker, NULL, lcore_id);
if (client_mode)
flags |= RTE_VHOST_USER_CLIENT;
- if (dequeue_zero_copy)
- flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
-
/* Register vhost user driver to handle vhost messages. */
for (i = 0; i < nb_sockets; i++) {
char *file = socket_files + i * PATH_MAX;
+ if (async_vhost_driver)
+ flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
ret = rte_vhost_driver_register(file, flags);
if (ret != 0) {
unregister_drivers(i);
}
}
- RTE_LCORE_FOREACH_SLAVE(lcore_id)
+ RTE_LCORE_FOREACH_WORKER(lcore_id)
rte_eal_wait_lcore(lcore_id);
return 0;