4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/types.h>
42 #include <linux/binfmts.h>
43 #include <xen/xen-compat.h>
44 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
49 #include <linux/virtio_ring.h>
52 #include <rte_ethdev.h>
53 #include <rte_malloc.h>
54 #include <rte_memcpy.h>
55 #include <rte_string_fns.h>
57 #include <cmdline_parse.h>
58 #include <cmdline_parse_etheraddr.h>
60 #include "rte_xen_lib.h"
61 #include "virtqueue.h"
62 #include "rte_eth_xenvirt.h"
64 #define VQ_DESC_NUM 256
65 #define VIRTIO_MBUF_BURST_SZ 64
67 /* virtio_idx is increased after new device is created.*/
68 static int virtio_idx = 0;
70 static const char *drivername = "xen virtio PMD";
72 static struct rte_eth_link pmd_link = {
74 .link_duplex = ETH_LINK_FULL_DUPLEX,
79 eth_xenvirt_free_queues(struct rte_eth_dev *dev);
81 static inline struct rte_mbuf *
82 rte_rxmbuf_alloc(struct rte_mempool *mp)
86 m = __rte_mbuf_raw_alloc(mp);
87 __rte_mbuf_sanity_check_raw(m, 0);
94 eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
96 struct virtqueue *rxvq = q;
97 struct rte_mbuf *rxm, *new_mbuf;
98 uint16_t nb_used, num;
99 uint32_t len[VIRTIO_MBUF_BURST_SZ];
101 struct pmd_internals *pi = rxvq->internals;
103 nb_used = VIRTQUEUE_NUSED(rxvq);
105 rte_compiler_barrier(); /* rmb */
106 num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
107 num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
108 if (unlikely(num == 0)) return 0;
110 num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num);
111 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
112 for (i = 0; i < num ; i ++) {
114 PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
116 rxm->data_off = RTE_PKTMBUF_HEADROOM;
117 rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
119 rxm->port = pi->port_id;
120 rxm->pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
122 /* allocate new mbuf for the used descriptor */
123 while (likely(!virtqueue_full(rxvq))) {
124 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
125 if (unlikely(new_mbuf == NULL)) {
128 if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) {
129 rte_pktmbuf_free_seg(new_mbuf);
133 pi->eth_stats.ipackets += num;
138 eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
140 struct virtqueue *txvq = tx_queue;
141 struct rte_mbuf *txm;
142 uint16_t nb_used, nb_tx, num, i;
144 uint32_t len[VIRTIO_MBUF_BURST_SZ];
145 struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ];
146 struct pmd_internals *pi = txvq->internals;
150 if (unlikely(nb_pkts == 0))
153 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
154 nb_used = VIRTQUEUE_NUSED(txvq);
156 rte_compiler_barrier(); /* rmb */
158 num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
159 num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
161 for (i = 0; i < num ; i ++) {
162 /* mergable not supported, one segment only */
163 rte_pktmbuf_free_seg(snd_pkts[i]);
166 while (nb_tx < nb_pkts) {
167 if (likely(!virtqueue_full(txvq))) {
168 /* TODO drop tx_pkts if it contains multiple segments */
169 txm = tx_pkts[nb_tx];
170 error = virtqueue_enqueue_xmit(txvq, txm);
171 if (unlikely(error)) {
173 PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n");
174 else if (error == EMSGSIZE)
175 PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n");
177 PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error);
182 PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
183 /* virtqueue_notify not needed in our para-virt solution */
187 pi->eth_stats.opackets += nb_tx;
192 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
194 RTE_LOG(ERR, PMD, "%s\n", __func__);
199 * Create a shared page between guest and host.
200 * Host monitors this page if it is cleared on unmap, and then
201 * do necessary clean up.
204 gntalloc_vring_flag(int vtidx)
206 char key_str[PATH_MAX];
207 char val_str[PATH_MAX];
211 if (grefwatch_from_alloc(&gref_tmp, &ptr)) {
212 RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n");
216 *(uint8_t *)ptr = MAP_FLAG;
217 snprintf(val_str, sizeof(val_str), "%u", gref_tmp);
218 snprintf(key_str, sizeof(key_str),
219 DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx);
220 xenstore_write(key_str, val_str);
224 * Notify host this virtio device is started.
225 * Host could start polling this device.
228 dev_start_notify(int vtidx)
230 char key_str[PATH_MAX];
231 char val_str[PATH_MAX];
233 RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx);
234 gntalloc_vring_flag(vtidx);
236 snprintf(key_str, sizeof(key_str), "%s%s%d",
237 DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR,
239 snprintf(val_str, sizeof(val_str), "1");
240 xenstore_write(key_str, val_str);
244 * Notify host this virtio device is stopped.
245 * Host could stop polling this device.
248 dev_stop_notify(int vtidx)
255 update_mac_address(struct ether_addr *mac_addrs, int vtidx)
257 char key_str[PATH_MAX];
258 char val_str[PATH_MAX];
261 if (mac_addrs == NULL) {
262 RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__);
265 rv = snprintf(key_str, sizeof(key_str),
266 DPDK_XENSTORE_PATH"%d_ether_addr", vtidx);
269 rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x",
270 mac_addrs->addr_bytes[0],
271 mac_addrs->addr_bytes[1],
272 mac_addrs->addr_bytes[2],
273 mac_addrs->addr_bytes[3],
274 mac_addrs->addr_bytes[4],
275 mac_addrs->addr_bytes[5]);
278 if (xenstore_write(key_str, val_str))
285 eth_dev_start(struct rte_eth_dev *dev)
287 struct virtqueue *rxvq = dev->data->rx_queues[0];
288 struct virtqueue *txvq = dev->data->tx_queues[0];
290 struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
293 dev->data->dev_link.link_status = 1;
294 while (!virtqueue_full(rxvq)) {
295 m = rte_rxmbuf_alloc(rxvq->mpool);
298 /* Enqueue allocated buffers. */
299 if (virtqueue_enqueue_recv_refill(rxvq, m)) {
300 rte_pktmbuf_free_seg(m);
305 rxvq->internals = pi;
306 txvq->internals = pi;
308 rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx);
311 dev_start_notify(pi->virtio_idx);
317 eth_dev_stop(struct rte_eth_dev *dev)
319 struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
321 dev->data->dev_link.link_status = 0;
322 dev_stop_notify(pi->virtio_idx);
326 * Notify host this virtio device is closed.
327 * Host could do necessary clean up to this device.
330 eth_dev_close(struct rte_eth_dev *dev)
332 eth_xenvirt_free_queues(dev);
336 eth_dev_info(struct rte_eth_dev *dev,
337 struct rte_eth_dev_info *dev_info)
339 struct pmd_internals *internals = dev->data->dev_private;
341 RTE_SET_USED(internals);
342 dev_info->driver_name = drivername;
343 dev_info->max_mac_addrs = 1;
344 dev_info->max_rx_pktlen = (uint32_t)2048;
345 dev_info->max_rx_queues = (uint16_t)1;
346 dev_info->max_tx_queues = (uint16_t)1;
347 dev_info->min_rx_bufsize = 0;
348 dev_info->pci_dev = NULL;
352 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
354 struct pmd_internals *internals = dev->data->dev_private;
356 rte_memcpy(stats, &internals->eth_stats, sizeof(*stats));
360 eth_stats_reset(struct rte_eth_dev *dev)
362 struct pmd_internals *internals = dev->data->dev_private;
363 /* Reset software totals */
364 memset(&internals->eth_stats, 0, sizeof(internals->eth_stats));
368 eth_queue_release(void *q)
374 eth_link_update(struct rte_eth_dev *dev __rte_unused,
375 int wait_to_complete __rte_unused)
381 * Create shared vring between guest and host.
382 * Memory is allocated through grant alloc driver, so it is not physical continuous.
385 gntalloc_vring_create(int queue_type, uint32_t size, int vtidx)
387 char key_str[PATH_MAX] = {0};
388 char val_str[PATH_MAX] = {0};
392 uint32_t *gref_arr = NULL;
393 phys_addr_t *pa_arr = NULL;
394 uint64_t start_index;
397 pg_size = getpagesize();
398 size = RTE_ALIGN_CEIL(size, pg_size);
399 pg_num = size / pg_size;
401 gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
402 pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
404 if (gref_arr == NULL || pa_arr == NULL) {
405 RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__);
409 va = gntalloc(size, gref_arr, &start_index);
411 RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__);
415 if (get_phys_map(va, pa_arr, pg_num, pg_size))
418 /* write in xenstore gref and pfn for each page of vring */
419 if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
420 gntfree(va, size, start_index);
425 if (queue_type == VTNET_RQ)
426 rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx);
428 rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx);
429 if (rv == -1 || xenstore_write(key_str, val_str) == -1) {
430 gntfree(va, size, start_index);
444 static struct virtqueue *
445 virtio_queue_setup(struct rte_eth_dev *dev, int queue_type)
447 struct virtqueue *vq = NULL;
448 uint16_t vq_size = VQ_DESC_NUM;
450 char vq_name[VIRTQUEUE_MAX_NAME_SZ];
454 /* Allocate memory for virtqueue. */
455 if (queue_type == VTNET_RQ) {
456 snprintf(vq_name, sizeof(vq_name), "port%d_rvq",
458 vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
459 vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
461 RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
464 memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
465 } else if(queue_type == VTNET_TQ) {
466 snprintf(vq_name, sizeof(vq_name), "port%d_tvq",
468 vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
469 vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
471 RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
474 memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
477 memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
479 vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN;
480 vq->vq_nentries = vq_size;
481 vq->vq_free_cnt = vq_size;
482 /* Calcuate vring size according to virtio spec */
483 size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
484 vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
485 /* Allocate memory for virtio vring through gntalloc driver*/
486 vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size,
487 ((struct pmd_internals *)dev->data->dev_private)->virtio_idx);
488 memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
490 vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment);
492 * Locally maintained last consumed index, this idex trails
495 vq->vq_used_cons_idx = 0;
496 vq->vq_desc_head_idx = 0;
497 vq->vq_free_cnt = vq->vq_nentries;
498 memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
500 /* Chain all the descriptors in the ring with an END */
501 for (i = 0; i < vq_size - 1; i++)
502 vr->desc[i].next = (uint16_t)(i + 1);
503 vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
509 eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id,
510 uint16_t nb_rx_desc __rte_unused,
511 unsigned int socket_id __rte_unused,
512 const struct rte_eth_rxconf *rx_conf __rte_unused,
513 struct rte_mempool *mb_pool)
515 struct virtqueue *vq;
516 vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ);
522 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
523 uint16_t nb_tx_desc __rte_unused,
524 unsigned int socket_id __rte_unused,
525 const struct rte_eth_txconf *tx_conf __rte_unused)
527 dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ);
532 eth_xenvirt_free_queues(struct rte_eth_dev *dev)
536 for (i = 0; i < dev->data->nb_rx_queues; i++) {
537 eth_queue_release(dev->data->rx_queues[i]);
538 dev->data->rx_queues[i] = NULL;
540 dev->data->nb_rx_queues = 0;
542 for (i = 0; i < dev->data->nb_tx_queues; i++) {
543 eth_queue_release(dev->data->tx_queues[i]);
544 dev->data->tx_queues[i] = NULL;
546 dev->data->nb_tx_queues = 0;
549 static const struct eth_dev_ops ops = {
550 .dev_start = eth_dev_start,
551 .dev_stop = eth_dev_stop,
552 .dev_close = eth_dev_close,
553 .dev_configure = eth_dev_configure,
554 .dev_infos_get = eth_dev_info,
555 .rx_queue_setup = eth_rx_queue_setup,
556 .tx_queue_setup = eth_tx_queue_setup,
557 .rx_queue_release = eth_queue_release,
558 .tx_queue_release = eth_queue_release,
559 .link_update = eth_link_update,
560 .stats_get = eth_stats_get,
561 .stats_reset = eth_stats_reset,
566 rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict,
567 const char *name, const char *params)
570 char *pairs[RTE_ETH_XENVIRT_MAX_ARGS];
579 args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE);
581 RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name);
584 rte_memcpy(args, params, strlen(params));
586 num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN),
588 RTE_ETH_XENVIRT_MAX_ARGS ,
589 RTE_ETH_XENVIRT_PAIRS_DELIM);
591 for (i = 0; i < num_of_pairs; i++) {
594 rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN),
596 RTE_ETH_XENVIRT_KEY_VALUE_DELIM);
598 if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0
599 || pair[1][0] == 0) {
601 "Couldn't parse %s device,"
602 "wrong key or value \n", name);
606 if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM,
607 sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) {
608 if (cmdline_parse_etheraddr(NULL,
611 sizeof(dict->addr)) < 0) {
613 "Invalid %s device ether address\n",
618 dict->addr_valid = 1;
635 eth_dev_xenvirt_create(const char *name, const char *params,
636 const unsigned numa_node,
637 enum dev_action action)
639 struct rte_eth_dev_data *data = NULL;
640 struct rte_pci_device *pci_dev = NULL;
641 struct pmd_internals *internals = NULL;
642 struct rte_eth_dev *eth_dev = NULL;
643 struct xenvirt_dict dict;
644 bzero(&dict, sizeof(struct xenvirt_dict));
646 RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n",
648 RTE_SET_USED(action);
650 if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) {
651 RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__);
655 /* now do all data allocation - for eth_dev structure, dummy pci driver
656 * and internal (private) data
658 data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
662 pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node);
666 internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
667 if (internals == NULL)
670 /* reserve an ethdev entry */
671 eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
675 pci_dev->numa_node = numa_node;
677 data->dev_private = internals;
678 data->port_id = eth_dev->data->port_id;
679 data->nb_rx_queues = (uint16_t)1;
680 data->nb_tx_queues = (uint16_t)1;
681 data->dev_link = pmd_link;
682 data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0);
685 memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr));
687 eth_random_addr(&data->mac_addrs->addr_bytes[0]);
689 eth_dev->data = data;
690 eth_dev->dev_ops = &ops;
692 eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE;
693 eth_dev->pci_dev = pci_dev;
694 eth_dev->data->kdrv = RTE_KDRV_NONE;
695 eth_dev->data->drv_name = drivername;
696 eth_dev->driver = NULL;
697 eth_dev->data->numa_node = numa_node;
699 eth_dev->rx_pkt_burst = eth_xenvirt_rx;
700 eth_dev->tx_pkt_burst = eth_xenvirt_tx;
702 internals->virtio_idx = virtio_idx++;
703 internals->port_id = eth_dev->data->port_id;
717 eth_dev_xenvirt_free(const char *name, const unsigned numa_node)
719 struct rte_eth_dev *eth_dev = NULL;
722 "Free virtio rings backed ethdev on numa socket %u\n",
725 /* find an ethdev entry */
726 eth_dev = rte_eth_dev_allocated(name);
730 if (eth_dev->data->dev_started == 1) {
731 eth_dev_stop(eth_dev);
732 eth_dev_close(eth_dev);
735 eth_dev->rx_pkt_burst = NULL;
736 eth_dev->tx_pkt_burst = NULL;
737 eth_dev->dev_ops = NULL;
739 rte_free(eth_dev->data);
740 rte_free(eth_dev->data->dev_private);
741 rte_free(eth_dev->data->mac_addrs);
748 /*TODO: Support multiple process model */
750 rte_pmd_xenvirt_devinit(const char *name, const char *params)
752 if (virtio_idx == 0) {
753 if (xenstore_init() != 0) {
754 RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
757 if (gntalloc_open() != 0) {
758 RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__);
762 eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE);
767 rte_pmd_xenvirt_devuninit(const char *name)
769 eth_dev_xenvirt_free(name, rte_socket_id());
771 if (virtio_idx == 0) {
772 if (xenstore_uninit() != 0)
773 RTE_LOG(ERR, PMD, "%s: xenstore uninit failed\n", __func__);
780 static struct rte_driver pmd_xenvirt_drv = {
781 .name = "eth_xenvirt",
783 .init = rte_pmd_xenvirt_devinit,
784 .uninit = rte_pmd_xenvirt_devuninit,
787 PMD_REGISTER_DRIVER(pmd_xenvirt_drv);