609824b6e2f99f5424d9771fd5e702090178b275
[dpdk.git] / drivers / net / xenvirt / rte_eth_xenvirt.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <sys/types.h>
39 #include <sys/mman.h>
40 #include <errno.h>
41 #include <sys/user.h>
42 #ifndef PAGE_SIZE
43 #define PAGE_SIZE sysconf(_SC_PAGE_SIZE)
44 #endif
45 #include <linux/binfmts.h>
46 #include <xen/xen-compat.h>
47 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
48 #include <xs.h>
49 #else
50 #include <xenstore.h>
51 #endif
52 #include <linux/virtio_ring.h>
53
54 #include <rte_mbuf.h>
55 #include <rte_ethdev.h>
56 #include <rte_malloc.h>
57 #include <rte_memcpy.h>
58 #include <rte_string_fns.h>
59 #include <rte_vdev.h>
60 #include <cmdline_parse.h>
61 #include <cmdline_parse_etheraddr.h>
62
63 #include "rte_xen_lib.h"
64 #include "virtqueue.h"
65 #include "rte_eth_xenvirt.h"
66
67 #define VQ_DESC_NUM 256
68 #define VIRTIO_MBUF_BURST_SZ 64
69
70 /* virtio_idx is increased after new device is created.*/
71 static int virtio_idx = 0;
72
73 static const char *drivername = "xen virtio PMD";
74
75 static struct rte_eth_link pmd_link = {
76                 .link_speed = ETH_SPEED_NUM_10G,
77                 .link_duplex = ETH_LINK_FULL_DUPLEX,
78                 .link_status = ETH_LINK_DOWN,
79                 .link_autoneg = ETH_LINK_SPEED_FIXED
80 };
81
82 static void
83 eth_xenvirt_free_queues(struct rte_eth_dev *dev);
84
85 static uint16_t
86 eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
87 {
88         struct virtqueue *rxvq = q;
89         struct rte_mbuf *rxm, *new_mbuf;
90         uint16_t nb_used, num;
91         uint32_t len[VIRTIO_MBUF_BURST_SZ];
92         uint32_t i;
93         struct pmd_internals *pi = rxvq->internals;
94
95         nb_used = VIRTQUEUE_NUSED(rxvq);
96
97         rte_smp_rmb();
98         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
99         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
100         if (unlikely(num == 0)) return 0;
101
102         num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num);
103         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
104         for (i = 0; i < num ; i ++) {
105                 rxm = rx_pkts[i];
106                 PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
107                 rxm->next = NULL;
108                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
109                 rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
110                 rxm->nb_segs = 1;
111                 rxm->port = pi->port_id;
112                 rxm->pkt_len  = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
113         }
114         /* allocate new mbuf for the used descriptor */
115         while (likely(!virtqueue_full(rxvq))) {
116                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
117                 if (unlikely(new_mbuf == NULL)) {
118                         break;
119                 }
120                 if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) {
121                         rte_pktmbuf_free_seg(new_mbuf);
122                         break;
123                 }
124         }
125         pi->eth_stats.ipackets += num;
126         return num;
127 }
128
129 static uint16_t
130 eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
131 {
132         struct virtqueue *txvq = tx_queue;
133         struct rte_mbuf *txm;
134         uint16_t nb_used, nb_tx, num, i;
135         int error;
136         uint32_t len[VIRTIO_MBUF_BURST_SZ];
137         struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ];
138         struct pmd_internals *pi = txvq->internals;
139
140         nb_tx = 0;
141
142         if (unlikely(nb_pkts == 0))
143                 return 0;
144
145         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
146         nb_used = VIRTQUEUE_NUSED(txvq);
147
148         rte_smp_rmb();
149
150         num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
151         num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
152
153         for (i = 0; i < num ; i ++) {
154                 /* mergable not supported, one segment only */
155                 rte_pktmbuf_free_seg(snd_pkts[i]);
156         }
157
158         while (nb_tx < nb_pkts) {
159                 if (likely(!virtqueue_full(txvq))) {
160                 /* TODO drop tx_pkts if it contains multiple segments */
161                         txm = tx_pkts[nb_tx];
162                         error = virtqueue_enqueue_xmit(txvq, txm);
163                         if (unlikely(error)) {
164                                 if (error == ENOSPC)
165                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n");
166                                 else if (error == EMSGSIZE)
167                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n");
168                                 else
169                                         PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error);
170                                 break;
171                         }
172                         nb_tx++;
173                 } else {
174                         PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
175                         /* virtqueue_notify not needed in our para-virt solution */
176                         break;
177                 }
178         }
179         pi->eth_stats.opackets += nb_tx;
180         return nb_tx;
181 }
182
183 static int
184 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
185 {
186         RTE_LOG(ERR, PMD, "%s\n", __func__);
187         return 0;
188 }
189
190 /*
191  * Create a shared page between guest and host.
192  * Host monitors this page if it is cleared on unmap, and then
193  * do necessary clean up.
194  */
195 static void
196 gntalloc_vring_flag(int vtidx)
197 {
198         char key_str[PATH_MAX];
199         char val_str[PATH_MAX];
200         uint32_t gref_tmp;
201         void *ptr;
202
203         if (grefwatch_from_alloc(&gref_tmp, &ptr)) {
204                 RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n");
205                 exit(0);
206         }
207
208         *(uint8_t *)ptr = MAP_FLAG;
209         snprintf(val_str, sizeof(val_str), "%u", gref_tmp);
210         snprintf(key_str, sizeof(key_str),
211                 DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx);
212         xenstore_write(key_str, val_str);
213 }
214
215 /*
216  * Notify host this virtio device is started.
217  * Host could start polling this device.
218  */
219 static void
220 dev_start_notify(int vtidx)
221 {
222         char key_str[PATH_MAX];
223         char val_str[PATH_MAX];
224
225         RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx);
226         gntalloc_vring_flag(vtidx);
227
228         snprintf(key_str, sizeof(key_str), "%s%s%d",
229                 DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR,
230                         vtidx);
231         snprintf(val_str, sizeof(val_str), "1");
232         xenstore_write(key_str, val_str);
233 }
234
235 /*
236  * Notify host this virtio device is stopped.
237  * Host could stop polling this device.
238  */
239 static void
240 dev_stop_notify(int vtidx)
241 {
242         RTE_SET_USED(vtidx);
243 }
244
245
246 static int
247 update_mac_address(struct ether_addr *mac_addrs, int vtidx)
248 {
249         char key_str[PATH_MAX];
250         char val_str[PATH_MAX];
251         int rv;
252
253         if (mac_addrs == NULL) {
254                 RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__);
255                 return -1;
256         }
257         rv = snprintf(key_str, sizeof(key_str),
258                         DPDK_XENSTORE_PATH"%d_ether_addr", vtidx);
259         if (rv == -1)
260                 return rv;
261         rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x",
262                         mac_addrs->addr_bytes[0],
263                         mac_addrs->addr_bytes[1],
264                         mac_addrs->addr_bytes[2],
265                         mac_addrs->addr_bytes[3],
266                         mac_addrs->addr_bytes[4],
267                         mac_addrs->addr_bytes[5]);
268         if (rv == -1)
269                 return rv;
270         if (xenstore_write(key_str, val_str))
271                 return rv;
272         return 0;
273 }
274
275
276 static int
277 eth_dev_start(struct rte_eth_dev *dev)
278 {
279         struct virtqueue *rxvq = dev->data->rx_queues[0];
280         struct virtqueue *txvq = dev->data->tx_queues[0];
281         struct rte_mbuf *m;
282         struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
283         int rv;
284
285         dev->data->dev_link.link_status = ETH_LINK_UP;
286         while (!virtqueue_full(rxvq)) {
287                 m = rte_mbuf_raw_alloc(rxvq->mpool);
288                 if (m == NULL)
289                         break;
290                 /* Enqueue allocated buffers. */
291                 if (virtqueue_enqueue_recv_refill(rxvq, m)) {
292                         rte_pktmbuf_free_seg(m);
293                         break;
294                 }
295         }
296
297         rxvq->internals = pi;
298         txvq->internals = pi;
299
300         rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx);
301         if (rv)
302                 return -1;
303         dev_start_notify(pi->virtio_idx);
304
305         return 0;
306 }
307
308 static void
309 eth_dev_stop(struct rte_eth_dev *dev)
310 {
311         struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
312
313         dev->data->dev_link.link_status = ETH_LINK_DOWN;
314         dev_stop_notify(pi->virtio_idx);
315 }
316
317 /*
318  * Notify host this virtio device is closed.
319  * Host could do necessary clean up to this device.
320  */
321 static void
322 eth_dev_close(struct rte_eth_dev *dev)
323 {
324         eth_xenvirt_free_queues(dev);
325 }
326
327 static void
328 eth_dev_info(struct rte_eth_dev *dev,
329                 struct rte_eth_dev_info *dev_info)
330 {
331         struct pmd_internals *internals = dev->data->dev_private;
332
333         RTE_SET_USED(internals);
334         dev_info->driver_name = drivername;
335         dev_info->max_mac_addrs = 1;
336         dev_info->max_rx_pktlen = (uint32_t)2048;
337         dev_info->max_rx_queues = (uint16_t)1;
338         dev_info->max_tx_queues = (uint16_t)1;
339         dev_info->min_rx_bufsize = 0;
340 }
341
342 static void
343 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
344 {
345         struct pmd_internals *internals = dev->data->dev_private;
346         if(stats)
347                 rte_memcpy(stats, &internals->eth_stats, sizeof(*stats));
348 }
349
350 static void
351 eth_stats_reset(struct rte_eth_dev *dev)
352 {
353         struct pmd_internals *internals = dev->data->dev_private;
354         /* Reset software totals */
355         memset(&internals->eth_stats, 0, sizeof(internals->eth_stats));
356 }
357
358 static void
359 eth_queue_release(void *q)
360 {
361         rte_free(q);
362 }
363
364 static int
365 eth_link_update(struct rte_eth_dev *dev __rte_unused,
366                 int wait_to_complete __rte_unused)
367 {
368         return 0;
369 }
370
371 /*
372  * Create shared vring between guest and host.
373  * Memory is allocated through grant alloc driver, so it is not physical continuous.
374  */
375 static void *
376 gntalloc_vring_create(int queue_type, uint32_t size, int vtidx)
377 {
378         char key_str[PATH_MAX] = {0};
379         char val_str[PATH_MAX] = {0};
380         void *va = NULL;
381         int pg_size;
382         uint32_t pg_num;
383         uint32_t *gref_arr = NULL;
384         phys_addr_t *pa_arr = NULL;
385         uint64_t start_index;
386         int rv;
387
388         pg_size = getpagesize();
389         size    = RTE_ALIGN_CEIL(size, pg_size);
390         pg_num  = size / pg_size;
391
392         gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
393         pa_arr  = calloc(pg_num, sizeof(pa_arr[0]));
394
395         if (gref_arr == NULL || pa_arr == NULL) {
396                 RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__);
397                 goto out;
398         }
399
400         va  = gntalloc(size, gref_arr, &start_index);
401         if (va == NULL) {
402                 RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__);
403                 goto out;
404         }
405
406         if (get_phys_map(va, pa_arr, pg_num, pg_size))
407                 goto out;
408
409         /* write in xenstore gref and pfn for each page of vring */
410         if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
411                 gntfree(va, size, start_index);
412                 va = NULL;
413                 goto out;
414         }
415
416         if (queue_type == VTNET_RQ)
417                 rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx);
418         else
419                 rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx);
420         if (rv == -1 || xenstore_write(key_str, val_str) == -1) {
421                 gntfree(va, size, start_index);
422                 va = NULL;
423         }
424 out:
425         free(pa_arr);
426         free(gref_arr);
427
428         return va;
429 }
430
431
432
433 static struct virtqueue *
434 virtio_queue_setup(struct rte_eth_dev *dev, int queue_type)
435 {
436         struct virtqueue *vq = NULL;
437         uint16_t vq_size = VQ_DESC_NUM;
438         int i = 0;
439         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
440         size_t size;
441         struct vring *vr;
442
443         /* Allocate memory for virtqueue. */
444         if (queue_type == VTNET_RQ) {
445                 snprintf(vq_name, sizeof(vq_name), "port%d_rvq",
446                                 dev->data->port_id);
447                 vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
448                         vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
449                 if (vq == NULL) {
450                         RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
451                         return NULL;
452                 }
453                 memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
454         } else if(queue_type == VTNET_TQ) {
455                 snprintf(vq_name, sizeof(vq_name), "port%d_tvq",
456                         dev->data->port_id);
457                 vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
458                         vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
459                 if (vq == NULL) {
460                         RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
461                         return NULL;
462                 }
463                 memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
464         }
465
466         memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
467
468         vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN;
469         vq->vq_nentries = vq_size;
470         vq->vq_free_cnt = vq_size;
471         /* Calcuate vring size according to virtio spec */
472         size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
473         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
474         /* Allocate memory for virtio vring through gntalloc driver*/
475         vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size,
476                 ((struct pmd_internals *)dev->data->dev_private)->virtio_idx);
477         memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
478         vr = &vq->vq_ring;
479         vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment);
480         /*
481          * Locally maintained last consumed index, this idex trails
482          * vq_ring.used->idx.
483          */
484         vq->vq_used_cons_idx = 0;
485         vq->vq_desc_head_idx = 0;
486         vq->vq_free_cnt = vq->vq_nentries;
487         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
488
489         /* Chain all the descriptors in the ring with an END */
490         for (i = 0; i < vq_size - 1; i++)
491                 vr->desc[i].next = (uint16_t)(i + 1);
492         vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
493
494         return vq;
495 }
496
497 static int
498 eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id,
499                                 uint16_t nb_rx_desc __rte_unused,
500                                 unsigned int socket_id __rte_unused,
501                                 const struct rte_eth_rxconf *rx_conf __rte_unused,
502                                 struct rte_mempool *mb_pool)
503 {
504         struct virtqueue *vq;
505         vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ);
506         vq->mpool = mb_pool;
507         return 0;
508 }
509
510 static int
511 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
512                                 uint16_t nb_tx_desc __rte_unused,
513                                 unsigned int socket_id __rte_unused,
514                                 const struct rte_eth_txconf *tx_conf __rte_unused)
515 {
516         dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ);
517         return 0;
518 }
519
520 static void
521 eth_xenvirt_free_queues(struct rte_eth_dev *dev)
522 {
523         int i;
524
525         for (i = 0; i < dev->data->nb_rx_queues; i++) {
526                 eth_queue_release(dev->data->rx_queues[i]);
527                 dev->data->rx_queues[i] = NULL;
528         }
529         dev->data->nb_rx_queues = 0;
530
531         for (i = 0; i < dev->data->nb_tx_queues; i++) {
532                 eth_queue_release(dev->data->tx_queues[i]);
533                 dev->data->tx_queues[i] = NULL;
534         }
535         dev->data->nb_tx_queues = 0;
536 }
537
538 static const struct eth_dev_ops ops = {
539         .dev_start = eth_dev_start,
540         .dev_stop = eth_dev_stop,
541         .dev_close = eth_dev_close,
542         .dev_configure = eth_dev_configure,
543         .dev_infos_get = eth_dev_info,
544         .rx_queue_setup = eth_rx_queue_setup,
545         .tx_queue_setup = eth_tx_queue_setup,
546         .rx_queue_release = eth_queue_release,
547         .tx_queue_release = eth_queue_release,
548         .link_update = eth_link_update,
549         .stats_get = eth_stats_get,
550         .stats_reset = eth_stats_reset,
551 };
552
553
554 static int
555 rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict,
556                         const char *name, const char *params)
557 {
558         int i;
559         char *pairs[RTE_ETH_XENVIRT_MAX_ARGS];
560         int num_of_pairs;
561         char *pair[2];
562         char *args;
563         int ret = -1;
564
565         if (params == NULL)
566                 return 0;
567
568         args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE);
569         if (args == NULL) {
570                 RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name);
571                 return -1;
572         }
573         rte_memcpy(args, params, strlen(params));
574
575         num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN),
576                                         pairs,
577                                         RTE_ETH_XENVIRT_MAX_ARGS ,
578                                         RTE_ETH_XENVIRT_PAIRS_DELIM);
579
580         for (i = 0; i < num_of_pairs; i++) {
581                 pair[0] = NULL;
582                 pair[1] = NULL;
583                 rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN),
584                                         pair, 2,
585                                         RTE_ETH_XENVIRT_KEY_VALUE_DELIM);
586
587                 if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0
588                         || pair[1][0] == 0) {
589                         RTE_LOG(ERR, PMD,
590                                 "Couldn't parse %s device,"
591                                 "wrong key or value \n", name);
592                         goto err;
593                 }
594
595                 if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM,
596                                 sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) {
597                         if (cmdline_parse_etheraddr(NULL,
598                                                     pair[1],
599                                                     &dict->addr,
600                                                     sizeof(dict->addr)) < 0) {
601                                 RTE_LOG(ERR, PMD,
602                                         "Invalid %s device ether address\n",
603                                         name);
604                                 goto err;
605                         }
606
607                         dict->addr_valid = 1;
608                 }
609         }
610
611         ret = 0;
612 err:
613         rte_free(args);
614         return ret;
615 }
616
617 enum dev_action {
618         DEV_CREATE,
619         DEV_ATTACH
620 };
621
622
623 static int
624 eth_dev_xenvirt_create(const char *name, const char *params,
625                 const unsigned numa_node,
626                 enum dev_action action)
627 {
628         struct rte_eth_dev_data *data = NULL;
629         struct pmd_internals *internals = NULL;
630         struct rte_eth_dev *eth_dev = NULL;
631         struct xenvirt_dict dict;
632
633         memset(&dict, 0, sizeof(struct xenvirt_dict));
634
635         RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n",
636                         numa_node);
637         RTE_SET_USED(action);
638
639         if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) {
640                 RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__);
641                 return -1;
642         }
643
644         /* now do all data allocation - for eth_dev structure, dummy pci driver
645          * and internal (private) data
646          */
647         data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
648         if (data == NULL)
649                 goto err;
650
651         internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
652         if (internals == NULL)
653                 goto err;
654
655         /* reserve an ethdev entry */
656         eth_dev = rte_eth_dev_allocate(name);
657         if (eth_dev == NULL)
658                 goto err;
659
660         data->dev_private = internals;
661         data->port_id = eth_dev->data->port_id;
662         data->nb_rx_queues = (uint16_t)1;
663         data->nb_tx_queues = (uint16_t)1;
664         data->dev_link = pmd_link;
665         data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0);
666
667         if(dict.addr_valid)
668                 memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr));
669         else
670                 eth_random_addr(&data->mac_addrs->addr_bytes[0]);
671
672         eth_dev->data = data;
673         eth_dev->dev_ops = &ops;
674
675         eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE;
676         eth_dev->data->kdrv = RTE_KDRV_NONE;
677         eth_dev->data->drv_name = drivername;
678         eth_dev->driver = NULL;
679         eth_dev->data->numa_node = numa_node;
680
681         eth_dev->rx_pkt_burst = eth_xenvirt_rx;
682         eth_dev->tx_pkt_burst = eth_xenvirt_tx;
683
684         internals->virtio_idx = virtio_idx++;
685         internals->port_id = eth_dev->data->port_id;
686
687         return 0;
688
689 err:
690         rte_free(data);
691         rte_free(internals);
692
693         return -1;
694 }
695
696
697 static int
698 eth_dev_xenvirt_free(const char *name, const unsigned numa_node)
699 {
700         struct rte_eth_dev *eth_dev = NULL;
701
702         RTE_LOG(DEBUG, PMD,
703                 "Free virtio rings backed ethdev on numa socket %u\n",
704                 numa_node);
705
706         /* find an ethdev entry */
707         eth_dev = rte_eth_dev_allocated(name);
708         if (eth_dev == NULL)
709                 return -1;
710
711         if (eth_dev->data->dev_started == 1) {
712                 eth_dev_stop(eth_dev);
713                 eth_dev_close(eth_dev);
714         }
715
716         eth_dev->rx_pkt_burst = NULL;
717         eth_dev->tx_pkt_burst = NULL;
718         eth_dev->dev_ops = NULL;
719
720         rte_free(eth_dev->data);
721         rte_free(eth_dev->data->dev_private);
722         rte_free(eth_dev->data->mac_addrs);
723
724         virtio_idx--;
725
726         return 0;
727 }
728
729 /*TODO: Support multiple process model */
730 static int
731 rte_pmd_xenvirt_probe(const char *name, const char *params)
732 {
733         if (virtio_idx == 0) {
734                 if (xenstore_init() != 0) {
735                         RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
736                         return -1;
737                 }
738                 if (gntalloc_open() != 0) {
739                         RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__);
740                         return -1;
741                 }
742         }
743         eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE);
744         return 0;
745 }
746
747 static int
748 rte_pmd_xenvirt_remove(const char *name)
749 {
750         eth_dev_xenvirt_free(name, rte_socket_id());
751
752         if (virtio_idx == 0) {
753                 if (xenstore_uninit() != 0)
754                         RTE_LOG(ERR, PMD, "%s: xenstore uninit failed\n", __func__);
755
756                 gntalloc_close();
757         }
758         return 0;
759 }
760
761 static struct rte_vdev_driver pmd_xenvirt_drv = {
762         .probe = rte_pmd_xenvirt_probe,
763         .remove = rte_pmd_xenvirt_remove,
764 };
765
766 RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv);
767 RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt);
768 RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt,
769         "mac=<mac addr>");