1f0e239f3d0c6050cfcb3dce8ccc9a692af353c0
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <rte_ethdev.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_atomic.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_pci.h>
20 #include <rte_bus_pci.h>
21 #include <rte_ether.h>
22 #include <rte_common.h>
23 #include <rte_errno.h>
24 #include <rte_cpuflags.h>
25
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_dev.h>
29 #include <rte_cycles.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio_pci.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36
37 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
38 static int  virtio_dev_configure(struct rte_eth_dev *dev);
39 static int  virtio_dev_start(struct rte_eth_dev *dev);
40 static void virtio_dev_stop(struct rte_eth_dev *dev);
41 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static void virtio_dev_info_get(struct rte_eth_dev *dev,
46                                 struct rte_eth_dev_info *dev_info);
47 static int virtio_dev_link_update(struct rte_eth_dev *dev,
48         int wait_to_complete);
49 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
50
51 static void virtio_set_hwaddr(struct virtio_hw *hw);
52 static void virtio_get_hwaddr(struct virtio_hw *hw);
53
54 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
55                                  struct rte_eth_stats *stats);
56 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
57                                  struct rte_eth_xstat *xstats, unsigned n);
58 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
59                                        struct rte_eth_xstat_name *xstats_names,
60                                        unsigned limit);
61 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
62 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
63 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
64                                 uint16_t vlan_id, int on);
65 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
66                                 struct ether_addr *mac_addr,
67                                 uint32_t index, uint32_t vmdq);
68 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
69 static void virtio_mac_addr_set(struct rte_eth_dev *dev,
70                                 struct ether_addr *mac_addr);
71
72 static int virtio_intr_enable(struct rte_eth_dev *dev);
73 static int virtio_intr_disable(struct rte_eth_dev *dev);
74
75 static int virtio_dev_queue_stats_mapping_set(
76         struct rte_eth_dev *eth_dev,
77         uint16_t queue_id,
78         uint8_t stat_idx,
79         uint8_t is_rx);
80
81 int virtio_logtype_init;
82 int virtio_logtype_driver;
83
84 /*
85  * The set of PCI devices this driver supports
86  */
87 static const struct rte_pci_id pci_id_virtio_map[] = {
88         { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
89         { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
90         { .vendor_id = 0, /* sentinel */ },
91 };
92
93 struct rte_virtio_xstats_name_off {
94         char name[RTE_ETH_XSTATS_NAME_SIZE];
95         unsigned offset;
96 };
97
98 /* [rt]x_qX_ is prepended to the name string here */
99 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
100         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
101         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
102         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
103         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
104         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
105         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
106         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
107         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
108         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
109         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
110         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
111         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
112         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
113 };
114
115 /* [rt]x_qX_ is prepended to the name string here */
116 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
117         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
118         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
119         {"errors",                 offsetof(struct virtnet_tx, stats.errors)},
120         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
121         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
122         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
123         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
124         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
125         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
126         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
127         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
128         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
129         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
130 };
131
132 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
133                             sizeof(rte_virtio_rxq_stat_strings[0]))
134 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
135                             sizeof(rte_virtio_txq_stat_strings[0]))
136
137 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
138
139 static int
140 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
141                 int *dlen, int pkt_num)
142 {
143         uint32_t head, i;
144         int k, sum = 0;
145         virtio_net_ctrl_ack status = ~0;
146         struct virtio_pmd_ctrl *result;
147         struct virtqueue *vq;
148
149         ctrl->status = status;
150
151         if (!cvq || !cvq->vq) {
152                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
153                 return -1;
154         }
155
156         rte_spinlock_lock(&cvq->lock);
157         vq = cvq->vq;
158         head = vq->vq_desc_head_idx;
159
160         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
161                 "vq->hw->cvq = %p vq = %p",
162                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
163
164         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
165                 rte_spinlock_unlock(&cvq->lock);
166                 return -1;
167         }
168
169         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
170                 sizeof(struct virtio_pmd_ctrl));
171
172         /*
173          * Format is enforced in qemu code:
174          * One TX packet for header;
175          * At least one TX packet per argument;
176          * One RX packet for ACK.
177          */
178         vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
179         vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
180         vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
181         vq->vq_free_cnt--;
182         i = vq->vq_ring.desc[head].next;
183
184         for (k = 0; k < pkt_num; k++) {
185                 vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
186                 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
187                         + sizeof(struct virtio_net_ctrl_hdr)
188                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
189                 vq->vq_ring.desc[i].len = dlen[k];
190                 sum += dlen[k];
191                 vq->vq_free_cnt--;
192                 i = vq->vq_ring.desc[i].next;
193         }
194
195         vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
196         vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
197                         + sizeof(struct virtio_net_ctrl_hdr);
198         vq->vq_ring.desc[i].len = sizeof(ctrl->status);
199         vq->vq_free_cnt--;
200
201         vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
202
203         vq_update_avail_ring(vq, head);
204         vq_update_avail_idx(vq);
205
206         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
207
208         virtqueue_notify(vq);
209
210         rte_rmb();
211         while (VIRTQUEUE_NUSED(vq) == 0) {
212                 rte_rmb();
213                 usleep(100);
214         }
215
216         while (VIRTQUEUE_NUSED(vq)) {
217                 uint32_t idx, desc_idx, used_idx;
218                 struct vring_used_elem *uep;
219
220                 used_idx = (uint32_t)(vq->vq_used_cons_idx
221                                 & (vq->vq_nentries - 1));
222                 uep = &vq->vq_ring.used->ring[used_idx];
223                 idx = (uint32_t) uep->id;
224                 desc_idx = idx;
225
226                 while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
227                         desc_idx = vq->vq_ring.desc[desc_idx].next;
228                         vq->vq_free_cnt++;
229                 }
230
231                 vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
232                 vq->vq_desc_head_idx = idx;
233
234                 vq->vq_used_cons_idx++;
235                 vq->vq_free_cnt++;
236         }
237
238         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
239                         vq->vq_free_cnt, vq->vq_desc_head_idx);
240
241         result = cvq->virtio_net_hdr_mz->addr;
242
243         rte_spinlock_unlock(&cvq->lock);
244         return result->status;
245 }
246
247 static int
248 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
249 {
250         struct virtio_hw *hw = dev->data->dev_private;
251         struct virtio_pmd_ctrl ctrl;
252         int dlen[1];
253         int ret;
254
255         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
256         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
257         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
258
259         dlen[0] = sizeof(uint16_t);
260
261         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
262         if (ret) {
263                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
264                           "failed, this is too late now...");
265                 return -EINVAL;
266         }
267
268         return 0;
269 }
270
271 static void
272 virtio_dev_queue_release(void *queue __rte_unused)
273 {
274         /* do nothing */
275 }
276
277 static int
278 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
279 {
280         if (vtpci_queue_idx == hw->max_queue_pairs * 2)
281                 return VTNET_CQ;
282         else if (vtpci_queue_idx % 2 == 0)
283                 return VTNET_RQ;
284         else
285                 return VTNET_TQ;
286 }
287
288 static uint16_t
289 virtio_get_nr_vq(struct virtio_hw *hw)
290 {
291         uint16_t nr_vq = hw->max_queue_pairs * 2;
292
293         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
294                 nr_vq += 1;
295
296         return nr_vq;
297 }
298
299 static void
300 virtio_init_vring(struct virtqueue *vq)
301 {
302         int size = vq->vq_nentries;
303         struct vring *vr = &vq->vq_ring;
304         uint8_t *ring_mem = vq->vq_ring_virt_mem;
305
306         PMD_INIT_FUNC_TRACE();
307
308         /*
309          * Reinitialise since virtio port might have been stopped and restarted
310          */
311         memset(ring_mem, 0, vq->vq_ring_size);
312         vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
313         vq->vq_used_cons_idx = 0;
314         vq->vq_desc_head_idx = 0;
315         vq->vq_avail_idx = 0;
316         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
317         vq->vq_free_cnt = vq->vq_nentries;
318         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
319
320         vring_desc_init(vr->desc, size);
321
322         /*
323          * Disable device(host) interrupting guest
324          */
325         virtqueue_disable_intr(vq);
326 }
327
328 static int
329 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
330 {
331         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
332         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
333         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
334         unsigned int vq_size, size;
335         struct virtio_hw *hw = dev->data->dev_private;
336         struct virtnet_rx *rxvq = NULL;
337         struct virtnet_tx *txvq = NULL;
338         struct virtnet_ctl *cvq = NULL;
339         struct virtqueue *vq;
340         size_t sz_hdr_mz = 0;
341         void *sw_ring = NULL;
342         int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
343         int ret;
344
345         PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
346
347         /*
348          * Read the virtqueue size from the Queue Size field
349          * Always power of 2 and if 0 virtqueue does not exist
350          */
351         vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
352         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
353         if (vq_size == 0) {
354                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
355                 return -EINVAL;
356         }
357
358         if (!rte_is_power_of_2(vq_size)) {
359                 PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
360                 return -EINVAL;
361         }
362
363         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
364                  dev->data->port_id, vtpci_queue_idx);
365
366         size = RTE_ALIGN_CEIL(sizeof(*vq) +
367                                 vq_size * sizeof(struct vq_desc_extra),
368                                 RTE_CACHE_LINE_SIZE);
369         if (queue_type == VTNET_TQ) {
370                 /*
371                  * For each xmit packet, allocate a virtio_net_hdr
372                  * and indirect ring elements
373                  */
374                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
375         } else if (queue_type == VTNET_CQ) {
376                 /* Allocate a page for control vq command, data and status */
377                 sz_hdr_mz = PAGE_SIZE;
378         }
379
380         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
381                                 SOCKET_ID_ANY);
382         if (vq == NULL) {
383                 PMD_INIT_LOG(ERR, "can not allocate vq");
384                 return -ENOMEM;
385         }
386         hw->vqs[vtpci_queue_idx] = vq;
387
388         vq->hw = hw;
389         vq->vq_queue_index = vtpci_queue_idx;
390         vq->vq_nentries = vq_size;
391
392         /*
393          * Reserve a memzone for vring elements
394          */
395         size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
396         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
397         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
398                      size, vq->vq_ring_size);
399
400         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
401                                          SOCKET_ID_ANY,
402                                          0, VIRTIO_PCI_VRING_ALIGN);
403         if (mz == NULL) {
404                 if (rte_errno == EEXIST)
405                         mz = rte_memzone_lookup(vq_name);
406                 if (mz == NULL) {
407                         ret = -ENOMEM;
408                         goto fail_q_alloc;
409                 }
410         }
411
412         memset(mz->addr, 0, mz->len);
413
414         vq->vq_ring_mem = mz->iova;
415         vq->vq_ring_virt_mem = mz->addr;
416         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
417                      (uint64_t)mz->iova);
418         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
419                      (uint64_t)(uintptr_t)mz->addr);
420
421         virtio_init_vring(vq);
422
423         if (sz_hdr_mz) {
424                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
425                          dev->data->port_id, vtpci_queue_idx);
426                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
427                                                      SOCKET_ID_ANY, 0,
428                                                      RTE_CACHE_LINE_SIZE);
429                 if (hdr_mz == NULL) {
430                         if (rte_errno == EEXIST)
431                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
432                         if (hdr_mz == NULL) {
433                                 ret = -ENOMEM;
434                                 goto fail_q_alloc;
435                         }
436                 }
437         }
438
439         if (queue_type == VTNET_RQ) {
440                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
441                                sizeof(vq->sw_ring[0]);
442
443                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
444                                 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
445                 if (!sw_ring) {
446                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
447                         ret = -ENOMEM;
448                         goto fail_q_alloc;
449                 }
450
451                 vq->sw_ring = sw_ring;
452                 rxvq = &vq->rxq;
453                 rxvq->vq = vq;
454                 rxvq->port_id = dev->data->port_id;
455                 rxvq->mz = mz;
456         } else if (queue_type == VTNET_TQ) {
457                 txvq = &vq->txq;
458                 txvq->vq = vq;
459                 txvq->port_id = dev->data->port_id;
460                 txvq->mz = mz;
461                 txvq->virtio_net_hdr_mz = hdr_mz;
462                 txvq->virtio_net_hdr_mem = hdr_mz->iova;
463         } else if (queue_type == VTNET_CQ) {
464                 cvq = &vq->cq;
465                 cvq->vq = vq;
466                 cvq->mz = mz;
467                 cvq->virtio_net_hdr_mz = hdr_mz;
468                 cvq->virtio_net_hdr_mem = hdr_mz->iova;
469                 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
470
471                 hw->cvq = cvq;
472         }
473
474         /* For virtio_user case (that is when hw->dev is NULL), we use
475          * virtual address. And we need properly set _offset_, please see
476          * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
477          */
478         if (!hw->virtio_user_dev)
479                 vq->offset = offsetof(struct rte_mbuf, buf_iova);
480         else {
481                 vq->vq_ring_mem = (uintptr_t)mz->addr;
482                 vq->offset = offsetof(struct rte_mbuf, buf_addr);
483                 if (queue_type == VTNET_TQ)
484                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
485                 else if (queue_type == VTNET_CQ)
486                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
487         }
488
489         if (queue_type == VTNET_TQ) {
490                 struct virtio_tx_region *txr;
491                 unsigned int i;
492
493                 txr = hdr_mz->addr;
494                 memset(txr, 0, vq_size * sizeof(*txr));
495                 for (i = 0; i < vq_size; i++) {
496                         struct vring_desc *start_dp = txr[i].tx_indir;
497
498                         vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
499
500                         /* first indirect descriptor is always the tx header */
501                         start_dp->addr = txvq->virtio_net_hdr_mem
502                                 + i * sizeof(*txr)
503                                 + offsetof(struct virtio_tx_region, tx_hdr);
504
505                         start_dp->len = hw->vtnet_hdr_size;
506                         start_dp->flags = VRING_DESC_F_NEXT;
507                 }
508         }
509
510         if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
511                 PMD_INIT_LOG(ERR, "setup_queue failed");
512                 return -EINVAL;
513         }
514
515         return 0;
516
517 fail_q_alloc:
518         rte_free(sw_ring);
519         rte_memzone_free(hdr_mz);
520         rte_memzone_free(mz);
521         rte_free(vq);
522
523         return ret;
524 }
525
526 static void
527 virtio_free_queues(struct virtio_hw *hw)
528 {
529         uint16_t nr_vq = virtio_get_nr_vq(hw);
530         struct virtqueue *vq;
531         int queue_type;
532         uint16_t i;
533
534         if (hw->vqs == NULL)
535                 return;
536
537         for (i = 0; i < nr_vq; i++) {
538                 vq = hw->vqs[i];
539                 if (!vq)
540                         continue;
541
542                 queue_type = virtio_get_queue_type(hw, i);
543                 if (queue_type == VTNET_RQ) {
544                         rte_free(vq->sw_ring);
545                         rte_memzone_free(vq->rxq.mz);
546                 } else if (queue_type == VTNET_TQ) {
547                         rte_memzone_free(vq->txq.mz);
548                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
549                 } else {
550                         rte_memzone_free(vq->cq.mz);
551                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
552                 }
553
554                 rte_free(vq);
555                 hw->vqs[i] = NULL;
556         }
557
558         rte_free(hw->vqs);
559         hw->vqs = NULL;
560 }
561
562 static int
563 virtio_alloc_queues(struct rte_eth_dev *dev)
564 {
565         struct virtio_hw *hw = dev->data->dev_private;
566         uint16_t nr_vq = virtio_get_nr_vq(hw);
567         uint16_t i;
568         int ret;
569
570         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
571         if (!hw->vqs) {
572                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
573                 return -ENOMEM;
574         }
575
576         for (i = 0; i < nr_vq; i++) {
577                 ret = virtio_init_queue(dev, i);
578                 if (ret < 0) {
579                         virtio_free_queues(hw);
580                         return ret;
581                 }
582         }
583
584         return 0;
585 }
586
587 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
588
589 static void
590 virtio_dev_close(struct rte_eth_dev *dev)
591 {
592         struct virtio_hw *hw = dev->data->dev_private;
593         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
594
595         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
596
597         /* reset the NIC */
598         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
599                 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
600         if (intr_conf->rxq)
601                 virtio_queues_unbind_intr(dev);
602
603         if (intr_conf->lsc || intr_conf->rxq) {
604                 virtio_intr_disable(dev);
605                 rte_intr_efd_disable(dev->intr_handle);
606                 rte_free(dev->intr_handle->intr_vec);
607                 dev->intr_handle->intr_vec = NULL;
608         }
609
610         vtpci_reset(hw);
611         virtio_dev_free_mbufs(dev);
612         virtio_free_queues(hw);
613 }
614
615 static void
616 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
617 {
618         struct virtio_hw *hw = dev->data->dev_private;
619         struct virtio_pmd_ctrl ctrl;
620         int dlen[1];
621         int ret;
622
623         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
624                 PMD_INIT_LOG(INFO, "host does not support rx control");
625                 return;
626         }
627
628         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
629         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
630         ctrl.data[0] = 1;
631         dlen[0] = 1;
632
633         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
634         if (ret)
635                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
636 }
637
638 static void
639 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
640 {
641         struct virtio_hw *hw = dev->data->dev_private;
642         struct virtio_pmd_ctrl ctrl;
643         int dlen[1];
644         int ret;
645
646         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
647                 PMD_INIT_LOG(INFO, "host does not support rx control");
648                 return;
649         }
650
651         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
652         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
653         ctrl.data[0] = 0;
654         dlen[0] = 1;
655
656         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
657         if (ret)
658                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
659 }
660
661 static void
662 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
663 {
664         struct virtio_hw *hw = dev->data->dev_private;
665         struct virtio_pmd_ctrl ctrl;
666         int dlen[1];
667         int ret;
668
669         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
670                 PMD_INIT_LOG(INFO, "host does not support rx control");
671                 return;
672         }
673
674         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
675         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
676         ctrl.data[0] = 1;
677         dlen[0] = 1;
678
679         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
680         if (ret)
681                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
682 }
683
684 static void
685 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
686 {
687         struct virtio_hw *hw = dev->data->dev_private;
688         struct virtio_pmd_ctrl ctrl;
689         int dlen[1];
690         int ret;
691
692         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
693                 PMD_INIT_LOG(INFO, "host does not support rx control");
694                 return;
695         }
696
697         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
698         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
699         ctrl.data[0] = 0;
700         dlen[0] = 1;
701
702         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
703         if (ret)
704                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
705 }
706
707 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
708 static int
709 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
710 {
711         struct virtio_hw *hw = dev->data->dev_private;
712         uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN +
713                                  hw->vtnet_hdr_size;
714         uint32_t frame_size = mtu + ether_hdr_len;
715         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
716
717         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
718
719         if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) {
720                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
721                         ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
722                 return -EINVAL;
723         }
724         return 0;
725 }
726
727 static int
728 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
729 {
730         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
731         struct virtqueue *vq = rxvq->vq;
732
733         virtqueue_enable_intr(vq);
734         return 0;
735 }
736
737 static int
738 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
739 {
740         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
741         struct virtqueue *vq = rxvq->vq;
742
743         virtqueue_disable_intr(vq);
744         return 0;
745 }
746
747 /*
748  * dev_ops for virtio, bare necessities for basic operation
749  */
750 static const struct eth_dev_ops virtio_eth_dev_ops = {
751         .dev_configure           = virtio_dev_configure,
752         .dev_start               = virtio_dev_start,
753         .dev_stop                = virtio_dev_stop,
754         .dev_close               = virtio_dev_close,
755         .promiscuous_enable      = virtio_dev_promiscuous_enable,
756         .promiscuous_disable     = virtio_dev_promiscuous_disable,
757         .allmulticast_enable     = virtio_dev_allmulticast_enable,
758         .allmulticast_disable    = virtio_dev_allmulticast_disable,
759         .mtu_set                 = virtio_mtu_set,
760         .dev_infos_get           = virtio_dev_info_get,
761         .stats_get               = virtio_dev_stats_get,
762         .xstats_get              = virtio_dev_xstats_get,
763         .xstats_get_names        = virtio_dev_xstats_get_names,
764         .stats_reset             = virtio_dev_stats_reset,
765         .xstats_reset            = virtio_dev_stats_reset,
766         .link_update             = virtio_dev_link_update,
767         .vlan_offload_set        = virtio_dev_vlan_offload_set,
768         .rx_queue_setup          = virtio_dev_rx_queue_setup,
769         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
770         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
771         .rx_queue_release        = virtio_dev_queue_release,
772         .rx_descriptor_done      = virtio_dev_rx_queue_done,
773         .tx_queue_setup          = virtio_dev_tx_queue_setup,
774         .tx_queue_release        = virtio_dev_queue_release,
775         /* collect stats per queue */
776         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
777         .vlan_filter_set         = virtio_vlan_filter_set,
778         .mac_addr_add            = virtio_mac_addr_add,
779         .mac_addr_remove         = virtio_mac_addr_remove,
780         .mac_addr_set            = virtio_mac_addr_set,
781 };
782
783 static inline int
784 virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
785                                 struct rte_eth_link *link)
786 {
787         struct rte_eth_link *dst = link;
788         struct rte_eth_link *src = &(dev->data->dev_link);
789
790         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
791                         *(uint64_t *)src) == 0)
792                 return -1;
793
794         return 0;
795 }
796
797 /**
798  * Atomically writes the link status information into global
799  * structure rte_eth_dev.
800  *
801  * @param dev
802  *   - Pointer to the structure rte_eth_dev to read from.
803  *   - Pointer to the buffer to be saved with the link status.
804  *
805  * @return
806  *   - On success, zero.
807  *   - On failure, negative value.
808  */
809 static inline int
810 virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
811                 struct rte_eth_link *link)
812 {
813         struct rte_eth_link *dst = &(dev->data->dev_link);
814         struct rte_eth_link *src = link;
815
816         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
817                                         *(uint64_t *)src) == 0)
818                 return -1;
819
820         return 0;
821 }
822
823 static void
824 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
825 {
826         unsigned i;
827
828         for (i = 0; i < dev->data->nb_tx_queues; i++) {
829                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
830                 if (txvq == NULL)
831                         continue;
832
833                 stats->opackets += txvq->stats.packets;
834                 stats->obytes += txvq->stats.bytes;
835                 stats->oerrors += txvq->stats.errors;
836
837                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
838                         stats->q_opackets[i] = txvq->stats.packets;
839                         stats->q_obytes[i] = txvq->stats.bytes;
840                 }
841         }
842
843         for (i = 0; i < dev->data->nb_rx_queues; i++) {
844                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
845                 if (rxvq == NULL)
846                         continue;
847
848                 stats->ipackets += rxvq->stats.packets;
849                 stats->ibytes += rxvq->stats.bytes;
850                 stats->ierrors += rxvq->stats.errors;
851
852                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
853                         stats->q_ipackets[i] = rxvq->stats.packets;
854                         stats->q_ibytes[i] = rxvq->stats.bytes;
855                 }
856         }
857
858         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
859 }
860
861 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
862                                        struct rte_eth_xstat_name *xstats_names,
863                                        __rte_unused unsigned limit)
864 {
865         unsigned i;
866         unsigned count = 0;
867         unsigned t;
868
869         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
870                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
871
872         if (xstats_names != NULL) {
873                 /* Note: limit checked in rte_eth_xstats_names() */
874
875                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
876                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
877                         if (rxvq == NULL)
878                                 continue;
879                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
880                                 snprintf(xstats_names[count].name,
881                                         sizeof(xstats_names[count].name),
882                                         "rx_q%u_%s", i,
883                                         rte_virtio_rxq_stat_strings[t].name);
884                                 count++;
885                         }
886                 }
887
888                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
889                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
890                         if (txvq == NULL)
891                                 continue;
892                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
893                                 snprintf(xstats_names[count].name,
894                                         sizeof(xstats_names[count].name),
895                                         "tx_q%u_%s", i,
896                                         rte_virtio_txq_stat_strings[t].name);
897                                 count++;
898                         }
899                 }
900                 return count;
901         }
902         return nstats;
903 }
904
905 static int
906 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
907                       unsigned n)
908 {
909         unsigned i;
910         unsigned count = 0;
911
912         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
913                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
914
915         if (n < nstats)
916                 return nstats;
917
918         for (i = 0; i < dev->data->nb_rx_queues; i++) {
919                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
920
921                 if (rxvq == NULL)
922                         continue;
923
924                 unsigned t;
925
926                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
927                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
928                                 rte_virtio_rxq_stat_strings[t].offset);
929                         xstats[count].id = count;
930                         count++;
931                 }
932         }
933
934         for (i = 0; i < dev->data->nb_tx_queues; i++) {
935                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
936
937                 if (txvq == NULL)
938                         continue;
939
940                 unsigned t;
941
942                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
943                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
944                                 rte_virtio_txq_stat_strings[t].offset);
945                         xstats[count].id = count;
946                         count++;
947                 }
948         }
949
950         return count;
951 }
952
953 static int
954 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
955 {
956         virtio_update_stats(dev, stats);
957
958         return 0;
959 }
960
961 static void
962 virtio_dev_stats_reset(struct rte_eth_dev *dev)
963 {
964         unsigned int i;
965
966         for (i = 0; i < dev->data->nb_tx_queues; i++) {
967                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
968                 if (txvq == NULL)
969                         continue;
970
971                 txvq->stats.packets = 0;
972                 txvq->stats.bytes = 0;
973                 txvq->stats.errors = 0;
974                 txvq->stats.multicast = 0;
975                 txvq->stats.broadcast = 0;
976                 memset(txvq->stats.size_bins, 0,
977                        sizeof(txvq->stats.size_bins[0]) * 8);
978         }
979
980         for (i = 0; i < dev->data->nb_rx_queues; i++) {
981                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
982                 if (rxvq == NULL)
983                         continue;
984
985                 rxvq->stats.packets = 0;
986                 rxvq->stats.bytes = 0;
987                 rxvq->stats.errors = 0;
988                 rxvq->stats.multicast = 0;
989                 rxvq->stats.broadcast = 0;
990                 memset(rxvq->stats.size_bins, 0,
991                        sizeof(rxvq->stats.size_bins[0]) * 8);
992         }
993 }
994
995 static void
996 virtio_set_hwaddr(struct virtio_hw *hw)
997 {
998         vtpci_write_dev_config(hw,
999                         offsetof(struct virtio_net_config, mac),
1000                         &hw->mac_addr, ETHER_ADDR_LEN);
1001 }
1002
1003 static void
1004 virtio_get_hwaddr(struct virtio_hw *hw)
1005 {
1006         if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1007                 vtpci_read_dev_config(hw,
1008                         offsetof(struct virtio_net_config, mac),
1009                         &hw->mac_addr, ETHER_ADDR_LEN);
1010         } else {
1011                 eth_random_addr(&hw->mac_addr[0]);
1012                 virtio_set_hwaddr(hw);
1013         }
1014 }
1015
1016 static int
1017 virtio_mac_table_set(struct virtio_hw *hw,
1018                      const struct virtio_net_ctrl_mac *uc,
1019                      const struct virtio_net_ctrl_mac *mc)
1020 {
1021         struct virtio_pmd_ctrl ctrl;
1022         int err, len[2];
1023
1024         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1025                 PMD_DRV_LOG(INFO, "host does not support mac table");
1026                 return -1;
1027         }
1028
1029         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1030         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1031
1032         len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
1033         memcpy(ctrl.data, uc, len[0]);
1034
1035         len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
1036         memcpy(ctrl.data + len[0], mc, len[1]);
1037
1038         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1039         if (err != 0)
1040                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1041         return err;
1042 }
1043
1044 static int
1045 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
1046                     uint32_t index, uint32_t vmdq __rte_unused)
1047 {
1048         struct virtio_hw *hw = dev->data->dev_private;
1049         const struct ether_addr *addrs = dev->data->mac_addrs;
1050         unsigned int i;
1051         struct virtio_net_ctrl_mac *uc, *mc;
1052
1053         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1054                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1055                 return -EINVAL;
1056         }
1057
1058         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1059         uc->entries = 0;
1060         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1061         mc->entries = 0;
1062
1063         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1064                 const struct ether_addr *addr
1065                         = (i == index) ? mac_addr : addrs + i;
1066                 struct virtio_net_ctrl_mac *tbl
1067                         = is_multicast_ether_addr(addr) ? mc : uc;
1068
1069                 memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
1070         }
1071
1072         return virtio_mac_table_set(hw, uc, mc);
1073 }
1074
1075 static void
1076 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1077 {
1078         struct virtio_hw *hw = dev->data->dev_private;
1079         struct ether_addr *addrs = dev->data->mac_addrs;
1080         struct virtio_net_ctrl_mac *uc, *mc;
1081         unsigned int i;
1082
1083         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1084                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1085                 return;
1086         }
1087
1088         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1089         uc->entries = 0;
1090         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1091         mc->entries = 0;
1092
1093         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1094                 struct virtio_net_ctrl_mac *tbl;
1095
1096                 if (i == index || is_zero_ether_addr(addrs + i))
1097                         continue;
1098
1099                 tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
1100                 memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
1101         }
1102
1103         virtio_mac_table_set(hw, uc, mc);
1104 }
1105
1106 static void
1107 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
1108 {
1109         struct virtio_hw *hw = dev->data->dev_private;
1110
1111         memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
1112
1113         /* Use atomic update if available */
1114         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1115                 struct virtio_pmd_ctrl ctrl;
1116                 int len = ETHER_ADDR_LEN;
1117
1118                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1119                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1120
1121                 memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
1122                 virtio_send_command(hw->cvq, &ctrl, &len, 1);
1123         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1124                 virtio_set_hwaddr(hw);
1125 }
1126
1127 static int
1128 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1129 {
1130         struct virtio_hw *hw = dev->data->dev_private;
1131         struct virtio_pmd_ctrl ctrl;
1132         int len;
1133
1134         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1135                 return -ENOTSUP;
1136
1137         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1138         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1139         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1140         len = sizeof(vlan_id);
1141
1142         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1143 }
1144
1145 static int
1146 virtio_intr_enable(struct rte_eth_dev *dev)
1147 {
1148         struct virtio_hw *hw = dev->data->dev_private;
1149
1150         if (rte_intr_enable(dev->intr_handle) < 0)
1151                 return -1;
1152
1153         if (!hw->virtio_user_dev)
1154                 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1155
1156         return 0;
1157 }
1158
1159 static int
1160 virtio_intr_disable(struct rte_eth_dev *dev)
1161 {
1162         struct virtio_hw *hw = dev->data->dev_private;
1163
1164         if (rte_intr_disable(dev->intr_handle) < 0)
1165                 return -1;
1166
1167         if (!hw->virtio_user_dev)
1168                 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1169
1170         return 0;
1171 }
1172
1173 static int
1174 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1175 {
1176         uint64_t host_features;
1177
1178         /* Prepare guest_features: feature that driver wants to support */
1179         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1180                 req_features);
1181
1182         /* Read device(host) feature bits */
1183         host_features = VTPCI_OPS(hw)->get_features(hw);
1184         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1185                 host_features);
1186
1187         /* If supported, ensure MTU value is valid before acknowledging it. */
1188         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1189                 struct virtio_net_config config;
1190
1191                 vtpci_read_dev_config(hw,
1192                         offsetof(struct virtio_net_config, mtu),
1193                         &config.mtu, sizeof(config.mtu));
1194
1195                 if (config.mtu < ETHER_MIN_MTU)
1196                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1197         }
1198
1199         /*
1200          * Negotiate features: Subset of device feature bits are written back
1201          * guest feature bits.
1202          */
1203         hw->guest_features = req_features;
1204         hw->guest_features = vtpci_negotiate_features(hw, host_features);
1205         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1206                 hw->guest_features);
1207
1208         if (hw->modern) {
1209                 if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1210                         PMD_INIT_LOG(ERR,
1211                                 "VIRTIO_F_VERSION_1 features is not enabled.");
1212                         return -1;
1213                 }
1214                 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1215                 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1216                         PMD_INIT_LOG(ERR,
1217                                 "failed to set FEATURES_OK status!");
1218                         return -1;
1219                 }
1220         }
1221
1222         hw->req_guest_features = req_features;
1223
1224         return 0;
1225 }
1226
1227 int
1228 virtio_dev_pause(struct rte_eth_dev *dev)
1229 {
1230         struct virtio_hw *hw = dev->data->dev_private;
1231
1232         rte_spinlock_lock(&hw->state_lock);
1233
1234         if (hw->started == 0) {
1235                 /* Device is just stopped. */
1236                 rte_spinlock_unlock(&hw->state_lock);
1237                 return -1;
1238         }
1239         hw->started = 0;
1240         /*
1241          * Prevent the worker threads from touching queues to avoid contention,
1242          * 1 ms should be enough for the ongoing Tx function to finish.
1243          */
1244         rte_delay_ms(1);
1245         return 0;
1246 }
1247
1248 /*
1249  * Recover hw state to let the worker threads continue.
1250  */
1251 void
1252 virtio_dev_resume(struct rte_eth_dev *dev)
1253 {
1254         struct virtio_hw *hw = dev->data->dev_private;
1255
1256         hw->started = 1;
1257         rte_spinlock_unlock(&hw->state_lock);
1258 }
1259
1260 /*
1261  * Should be called only after device is paused.
1262  */
1263 int
1264 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1265                 int nb_pkts)
1266 {
1267         struct virtio_hw *hw = dev->data->dev_private;
1268         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1269         int ret;
1270
1271         hw->inject_pkts = tx_pkts;
1272         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1273         hw->inject_pkts = NULL;
1274
1275         return ret;
1276 }
1277
1278 /*
1279  * Process Virtio Config changed interrupt and call the callback
1280  * if link state changed.
1281  */
1282 void
1283 virtio_interrupt_handler(void *param)
1284 {
1285         struct rte_eth_dev *dev = param;
1286         struct virtio_hw *hw = dev->data->dev_private;
1287         uint8_t isr;
1288
1289         /* Read interrupt status which clears interrupt */
1290         isr = vtpci_isr(hw);
1291         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1292
1293         if (virtio_intr_enable(dev) < 0)
1294                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1295
1296         if (isr & VIRTIO_PCI_ISR_CONFIG) {
1297                 if (virtio_dev_link_update(dev, 0) == 0)
1298                         _rte_eth_dev_callback_process(dev,
1299                                                       RTE_ETH_EVENT_INTR_LSC,
1300                                                       NULL);
1301         }
1302
1303 }
1304
1305 /* set rx and tx handlers according to what is supported */
1306 static void
1307 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1308 {
1309         struct virtio_hw *hw = eth_dev->data->dev_private;
1310
1311         if (hw->use_simple_rx) {
1312                 PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
1313                         eth_dev->data->port_id);
1314                 eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1315         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1316                 PMD_INIT_LOG(INFO,
1317                         "virtio: using mergeable buffer Rx path on port %u",
1318                         eth_dev->data->port_id);
1319                 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1320         } else {
1321                 PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1322                         eth_dev->data->port_id);
1323                 eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1324         }
1325
1326         if (hw->use_simple_tx) {
1327                 PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
1328                         eth_dev->data->port_id);
1329                 eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
1330         } else {
1331                 PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1332                         eth_dev->data->port_id);
1333                 eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1334         }
1335 }
1336
1337 /* Only support 1:1 queue/interrupt mapping so far.
1338  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1339  * interrupt vectors (<N+1).
1340  */
1341 static int
1342 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1343 {
1344         uint32_t i;
1345         struct virtio_hw *hw = dev->data->dev_private;
1346
1347         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1348         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1349                 dev->intr_handle->intr_vec[i] = i + 1;
1350                 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1351                                                  VIRTIO_MSI_NO_VECTOR) {
1352                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1353                         return -EBUSY;
1354                 }
1355         }
1356
1357         return 0;
1358 }
1359
1360 static void
1361 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1362 {
1363         uint32_t i;
1364         struct virtio_hw *hw = dev->data->dev_private;
1365
1366         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1367         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1368                 VTPCI_OPS(hw)->set_queue_irq(hw,
1369                                              hw->vqs[i * VTNET_CQ],
1370                                              VIRTIO_MSI_NO_VECTOR);
1371 }
1372
1373 static int
1374 virtio_configure_intr(struct rte_eth_dev *dev)
1375 {
1376         struct virtio_hw *hw = dev->data->dev_private;
1377
1378         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1379                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1380                 return -ENOTSUP;
1381         }
1382
1383         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1384                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1385                 return -1;
1386         }
1387
1388         if (!dev->intr_handle->intr_vec) {
1389                 dev->intr_handle->intr_vec =
1390                         rte_zmalloc("intr_vec",
1391                                     hw->max_queue_pairs * sizeof(int), 0);
1392                 if (!dev->intr_handle->intr_vec) {
1393                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1394                                      hw->max_queue_pairs);
1395                         return -ENOMEM;
1396                 }
1397         }
1398
1399         /* Re-register callback to update max_intr */
1400         rte_intr_callback_unregister(dev->intr_handle,
1401                                      virtio_interrupt_handler,
1402                                      dev);
1403         rte_intr_callback_register(dev->intr_handle,
1404                                    virtio_interrupt_handler,
1405                                    dev);
1406
1407         /* DO NOT try to remove this! This function will enable msix, or QEMU
1408          * will encounter SIGSEGV when DRIVER_OK is sent.
1409          * And for legacy devices, this should be done before queue/vec binding
1410          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1411          * (22) will be ignored.
1412          */
1413         if (virtio_intr_enable(dev) < 0) {
1414                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1415                 return -1;
1416         }
1417
1418         if (virtio_queues_bind_intr(dev) < 0) {
1419                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1420                 return -1;
1421         }
1422
1423         return 0;
1424 }
1425
1426 /* reset device and renegotiate features if needed */
1427 static int
1428 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1429 {
1430         struct virtio_hw *hw = eth_dev->data->dev_private;
1431         struct virtio_net_config *config;
1432         struct virtio_net_config local_config;
1433         struct rte_pci_device *pci_dev = NULL;
1434         int ret;
1435
1436         /* Reset the device although not necessary at startup */
1437         vtpci_reset(hw);
1438
1439         /* Tell the host we've noticed this device. */
1440         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1441
1442         /* Tell the host we've known how to drive the device. */
1443         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1444         if (virtio_negotiate_features(hw, req_features) < 0)
1445                 return -1;
1446
1447         if (!hw->virtio_user_dev) {
1448                 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1449                 rte_eth_copy_pci_info(eth_dev, pci_dev);
1450         }
1451
1452         /* If host does not support both status and MSI-X then disable LSC */
1453         if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1454             hw->use_msix != VIRTIO_MSIX_NONE)
1455                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1456         else
1457                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1458
1459         /* Setting up rx_header size for the device */
1460         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1461             vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
1462                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1463         else
1464                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1465
1466         /* Copy the permanent MAC address to: virtio_hw */
1467         virtio_get_hwaddr(hw);
1468         ether_addr_copy((struct ether_addr *) hw->mac_addr,
1469                         &eth_dev->data->mac_addrs[0]);
1470         PMD_INIT_LOG(DEBUG,
1471                      "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1472                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1473                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1474
1475         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1476                 config = &local_config;
1477
1478                 vtpci_read_dev_config(hw,
1479                         offsetof(struct virtio_net_config, mac),
1480                         &config->mac, sizeof(config->mac));
1481
1482                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1483                         vtpci_read_dev_config(hw,
1484                                 offsetof(struct virtio_net_config, status),
1485                                 &config->status, sizeof(config->status));
1486                 } else {
1487                         PMD_INIT_LOG(DEBUG,
1488                                      "VIRTIO_NET_F_STATUS is not supported");
1489                         config->status = 0;
1490                 }
1491
1492                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1493                         vtpci_read_dev_config(hw,
1494                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1495                                 &config->max_virtqueue_pairs,
1496                                 sizeof(config->max_virtqueue_pairs));
1497                 } else {
1498                         PMD_INIT_LOG(DEBUG,
1499                                      "VIRTIO_NET_F_MQ is not supported");
1500                         config->max_virtqueue_pairs = 1;
1501                 }
1502
1503                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1504
1505                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1506                         vtpci_read_dev_config(hw,
1507                                 offsetof(struct virtio_net_config, mtu),
1508                                 &config->mtu,
1509                                 sizeof(config->mtu));
1510
1511                         /*
1512                          * MTU value has already been checked at negotiation
1513                          * time, but check again in case it has changed since
1514                          * then, which should not happen.
1515                          */
1516                         if (config->mtu < ETHER_MIN_MTU) {
1517                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1518                                                 config->mtu);
1519                                 return -1;
1520                         }
1521
1522                         hw->max_mtu = config->mtu;
1523                         /* Set initial MTU to maximum one supported by vhost */
1524                         eth_dev->data->mtu = config->mtu;
1525
1526                 } else {
1527                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1528                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1529                 }
1530
1531                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1532                                 config->max_virtqueue_pairs);
1533                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1534                 PMD_INIT_LOG(DEBUG,
1535                                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1536                                 config->mac[0], config->mac[1],
1537                                 config->mac[2], config->mac[3],
1538                                 config->mac[4], config->mac[5]);
1539         } else {
1540                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1541                 hw->max_queue_pairs = 1;
1542                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1543                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1544         }
1545
1546         ret = virtio_alloc_queues(eth_dev);
1547         if (ret < 0)
1548                 return ret;
1549
1550         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1551                 if (virtio_configure_intr(eth_dev) < 0) {
1552                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1553                         return -1;
1554                 }
1555         }
1556
1557         vtpci_reinit_complete(hw);
1558
1559         if (pci_dev)
1560                 PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1561                         eth_dev->data->port_id, pci_dev->id.vendor_id,
1562                         pci_dev->id.device_id);
1563
1564         return 0;
1565 }
1566
1567 /*
1568  * Remap the PCI device again (IO port map for legacy device and
1569  * memory map for modern device), so that the secondary process
1570  * could have the PCI initiated correctly.
1571  */
1572 static int
1573 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
1574 {
1575         if (hw->modern) {
1576                 /*
1577                  * We don't have to re-parse the PCI config space, since
1578                  * rte_pci_map_device() makes sure the mapped address
1579                  * in secondary process would equal to the one mapped in
1580                  * the primary process: error will be returned if that
1581                  * requirement is not met.
1582                  *
1583                  * That said, we could simply reuse all cap pointers
1584                  * (such as dev_cfg, common_cfg, etc.) parsed from the
1585                  * primary process, which is stored in shared memory.
1586                  */
1587                 if (rte_pci_map_device(pci_dev)) {
1588                         PMD_INIT_LOG(DEBUG, "failed to map pci device!");
1589                         return -1;
1590                 }
1591         } else {
1592                 if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
1593                         return -1;
1594         }
1595
1596         return 0;
1597 }
1598
1599 static void
1600 virtio_set_vtpci_ops(struct virtio_hw *hw)
1601 {
1602 #ifdef RTE_VIRTIO_USER
1603         if (hw->virtio_user_dev)
1604                 VTPCI_OPS(hw) = &virtio_user_ops;
1605         else
1606 #endif
1607         if (hw->modern)
1608                 VTPCI_OPS(hw) = &modern_ops;
1609         else
1610                 VTPCI_OPS(hw) = &legacy_ops;
1611 }
1612
1613 /*
1614  * This function is based on probe() function in virtio_pci.c
1615  * It returns 0 on success.
1616  */
1617 int
1618 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1619 {
1620         struct virtio_hw *hw = eth_dev->data->dev_private;
1621         int ret;
1622
1623         RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
1624
1625         eth_dev->dev_ops = &virtio_eth_dev_ops;
1626
1627         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1628                 if (!hw->virtio_user_dev) {
1629                         ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1630                         if (ret)
1631                                 return ret;
1632                 }
1633
1634                 virtio_set_vtpci_ops(hw);
1635                 set_rxtx_funcs(eth_dev);
1636
1637                 return 0;
1638         }
1639
1640         /* Allocate memory for storing MAC addresses */
1641         eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
1642         if (eth_dev->data->mac_addrs == NULL) {
1643                 PMD_INIT_LOG(ERR,
1644                         "Failed to allocate %d bytes needed to store MAC addresses",
1645                         VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
1646                 return -ENOMEM;
1647         }
1648
1649         hw->port_id = eth_dev->data->port_id;
1650         /* For virtio_user case the hw->virtio_user_dev is populated by
1651          * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
1652          */
1653         if (!hw->virtio_user_dev) {
1654                 ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1655                 if (ret)
1656                         goto out;
1657         }
1658
1659         /* reset device and negotiate default features */
1660         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1661         if (ret < 0)
1662                 goto out;
1663
1664         /* Setup interrupt callback  */
1665         if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1666                 rte_intr_callback_register(eth_dev->intr_handle,
1667                         virtio_interrupt_handler, eth_dev);
1668
1669         return 0;
1670
1671 out:
1672         rte_free(eth_dev->data->mac_addrs);
1673         return ret;
1674 }
1675
1676 static int
1677 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
1678 {
1679         PMD_INIT_FUNC_TRACE();
1680
1681         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1682                 return -EPERM;
1683
1684         virtio_dev_stop(eth_dev);
1685         virtio_dev_close(eth_dev);
1686
1687         eth_dev->dev_ops = NULL;
1688         eth_dev->tx_pkt_burst = NULL;
1689         eth_dev->rx_pkt_burst = NULL;
1690
1691         rte_free(eth_dev->data->mac_addrs);
1692         eth_dev->data->mac_addrs = NULL;
1693
1694         /* reset interrupt callback  */
1695         if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696                 rte_intr_callback_unregister(eth_dev->intr_handle,
1697                                                 virtio_interrupt_handler,
1698                                                 eth_dev);
1699         if (eth_dev->device)
1700                 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
1701
1702         PMD_INIT_LOG(DEBUG, "dev_uninit completed");
1703
1704         return 0;
1705 }
1706
1707 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1708         struct rte_pci_device *pci_dev)
1709 {
1710         return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
1711                 eth_virtio_dev_init);
1712 }
1713
1714 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
1715 {
1716         return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit);
1717 }
1718
1719 static struct rte_pci_driver rte_virtio_pmd = {
1720         .driver = {
1721                 .name = "net_virtio",
1722         },
1723         .id_table = pci_id_virtio_map,
1724         .drv_flags = 0,
1725         .probe = eth_virtio_pci_probe,
1726         .remove = eth_virtio_pci_remove,
1727 };
1728
1729 RTE_INIT(rte_virtio_pmd_init);
1730 static void
1731 rte_virtio_pmd_init(void)
1732 {
1733         if (rte_eal_iopl_init() != 0) {
1734                 PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
1735                 return;
1736         }
1737
1738         rte_pci_register(&rte_virtio_pmd);
1739 }
1740
1741 /*
1742  * Configure virtio device
1743  * It returns 0 on success.
1744  */
1745 static int
1746 virtio_dev_configure(struct rte_eth_dev *dev)
1747 {
1748         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1749         struct virtio_hw *hw = dev->data->dev_private;
1750         uint64_t req_features;
1751         int ret;
1752
1753         PMD_INIT_LOG(DEBUG, "configure");
1754         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
1755
1756         if (dev->data->dev_conf.intr_conf.rxq) {
1757                 ret = virtio_init_device(dev, hw->req_guest_features);
1758                 if (ret < 0)
1759                         return ret;
1760         }
1761
1762         /* The name hw_ip_checksum is a bit confusing since it can be
1763          * set by the application to request L3 and/or L4 checksums. In
1764          * case of virtio, only L4 checksum is supported.
1765          */
1766         if (rxmode->hw_ip_checksum)
1767                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
1768
1769         if (rxmode->enable_lro)
1770                 req_features |=
1771                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
1772                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
1773
1774         /* if request features changed, reinit the device */
1775         if (req_features != hw->req_guest_features) {
1776                 ret = virtio_init_device(dev, req_features);
1777                 if (ret < 0)
1778                         return ret;
1779         }
1780
1781         if (rxmode->hw_ip_checksum &&
1782                 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
1783                 PMD_DRV_LOG(ERR,
1784                         "rx checksum not available on this host");
1785                 return -ENOTSUP;
1786         }
1787
1788         if (rxmode->enable_lro &&
1789                 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
1790                  !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
1791                 PMD_DRV_LOG(ERR,
1792                         "Large Receive Offload not available on this host");
1793                 return -ENOTSUP;
1794         }
1795
1796         /* start control queue */
1797         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
1798                 virtio_dev_cq_start(dev);
1799
1800         hw->vlan_strip = rxmode->hw_vlan_strip;
1801
1802         if (rxmode->hw_vlan_filter
1803             && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
1804                 PMD_DRV_LOG(ERR,
1805                             "vlan filtering not available on this host");
1806                 return -ENOTSUP;
1807         }
1808
1809         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1810                 /* Enable vector (0) for Link State Intrerrupt */
1811                 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
1812                                 VIRTIO_MSI_NO_VECTOR) {
1813                         PMD_DRV_LOG(ERR, "failed to set config vector");
1814                         return -EBUSY;
1815                 }
1816
1817         rte_spinlock_init(&hw->state_lock);
1818
1819         hw->use_simple_rx = 1;
1820         hw->use_simple_tx = 1;
1821
1822 #if defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
1823         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
1824                 hw->use_simple_rx = 0;
1825                 hw->use_simple_tx = 0;
1826         }
1827 #endif
1828         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1829                 hw->use_simple_rx = 0;
1830                 hw->use_simple_tx = 0;
1831         }
1832
1833         if (rxmode->hw_ip_checksum)
1834                 hw->use_simple_rx = 0;
1835
1836         return 0;
1837 }
1838
1839
1840 static int
1841 virtio_dev_start(struct rte_eth_dev *dev)
1842 {
1843         uint16_t nb_queues, i;
1844         struct virtnet_rx *rxvq;
1845         struct virtnet_tx *txvq __rte_unused;
1846         struct virtio_hw *hw = dev->data->dev_private;
1847         int ret;
1848
1849         /* Finish the initialization of the queues */
1850         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1851                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
1852                 if (ret < 0)
1853                         return ret;
1854         }
1855         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1856                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
1857                 if (ret < 0)
1858                         return ret;
1859         }
1860
1861         /* check if lsc interrupt feature is enabled */
1862         if (dev->data->dev_conf.intr_conf.lsc) {
1863                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1864                         PMD_DRV_LOG(ERR, "link status not supported by host");
1865                         return -ENOTSUP;
1866                 }
1867         }
1868
1869         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
1870          * in device configure, but it could be unmapped  when device is
1871          * stopped.
1872          */
1873         if (dev->data->dev_conf.intr_conf.lsc ||
1874             dev->data->dev_conf.intr_conf.rxq) {
1875                 virtio_intr_disable(dev);
1876
1877                 if (virtio_intr_enable(dev) < 0) {
1878                         PMD_DRV_LOG(ERR, "interrupt enable failed");
1879                         return -EIO;
1880                 }
1881         }
1882
1883         /*Notify the backend
1884          *Otherwise the tap backend might already stop its queue due to fullness.
1885          *vhost backend will have no chance to be waked up
1886          */
1887         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
1888         if (hw->max_queue_pairs > 1) {
1889                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
1890                         return -EINVAL;
1891         }
1892
1893         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
1894
1895         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1896                 rxvq = dev->data->rx_queues[i];
1897                 /* Flush the old packets */
1898                 virtqueue_rxvq_flush(rxvq->vq);
1899                 virtqueue_notify(rxvq->vq);
1900         }
1901
1902         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1903                 txvq = dev->data->tx_queues[i];
1904                 virtqueue_notify(txvq->vq);
1905         }
1906
1907         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
1908
1909         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1910                 rxvq = dev->data->rx_queues[i];
1911                 VIRTQUEUE_DUMP(rxvq->vq);
1912         }
1913
1914         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1915                 txvq = dev->data->tx_queues[i];
1916                 VIRTQUEUE_DUMP(txvq->vq);
1917         }
1918
1919         set_rxtx_funcs(dev);
1920         hw->started = 1;
1921
1922         /* Initialize Link state */
1923         virtio_dev_link_update(dev, 0);
1924
1925         return 0;
1926 }
1927
1928 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
1929 {
1930         struct rte_mbuf *buf;
1931         int i, mbuf_num = 0;
1932
1933         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1934                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1935
1936                 PMD_INIT_LOG(DEBUG,
1937                              "Before freeing rxq[%d] used and unused buf", i);
1938                 VIRTQUEUE_DUMP(rxvq->vq);
1939
1940                 PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq);
1941                 while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) {
1942                         rte_pktmbuf_free(buf);
1943                         mbuf_num++;
1944                 }
1945
1946                 PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1947                 PMD_INIT_LOG(DEBUG,
1948                              "After freeing rxq[%d] used and unused buf", i);
1949                 VIRTQUEUE_DUMP(rxvq->vq);
1950         }
1951
1952         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1953                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1954
1955                 PMD_INIT_LOG(DEBUG,
1956                              "Before freeing txq[%d] used and unused bufs",
1957                              i);
1958                 VIRTQUEUE_DUMP(txvq->vq);
1959
1960                 mbuf_num = 0;
1961                 while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) {
1962                         rte_pktmbuf_free(buf);
1963                         mbuf_num++;
1964                 }
1965
1966                 PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1967                 PMD_INIT_LOG(DEBUG,
1968                              "After freeing txq[%d] used and unused buf", i);
1969                 VIRTQUEUE_DUMP(txvq->vq);
1970         }
1971 }
1972
1973 /*
1974  * Stop device: disable interrupt and mark link down
1975  */
1976 static void
1977 virtio_dev_stop(struct rte_eth_dev *dev)
1978 {
1979         struct virtio_hw *hw = dev->data->dev_private;
1980         struct rte_eth_link link;
1981         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
1982
1983         PMD_INIT_LOG(DEBUG, "stop");
1984
1985         rte_spinlock_lock(&hw->state_lock);
1986         if (intr_conf->lsc || intr_conf->rxq)
1987                 virtio_intr_disable(dev);
1988
1989         hw->started = 0;
1990         memset(&link, 0, sizeof(link));
1991         virtio_dev_atomic_write_link_status(dev, &link);
1992         rte_spinlock_unlock(&hw->state_lock);
1993 }
1994
1995 static int
1996 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
1997 {
1998         struct rte_eth_link link, old;
1999         uint16_t status;
2000         struct virtio_hw *hw = dev->data->dev_private;
2001         memset(&link, 0, sizeof(link));
2002         virtio_dev_atomic_read_link_status(dev, &link);
2003         old = link;
2004         link.link_duplex = ETH_LINK_FULL_DUPLEX;
2005         link.link_speed  = ETH_SPEED_NUM_10G;
2006
2007         if (hw->started == 0) {
2008                 link.link_status = ETH_LINK_DOWN;
2009         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2010                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2011                 vtpci_read_dev_config(hw,
2012                                 offsetof(struct virtio_net_config, status),
2013                                 &status, sizeof(status));
2014                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2015                         link.link_status = ETH_LINK_DOWN;
2016                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2017                                      dev->data->port_id);
2018                 } else {
2019                         link.link_status = ETH_LINK_UP;
2020                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2021                                      dev->data->port_id);
2022                 }
2023         } else {
2024                 link.link_status = ETH_LINK_UP;
2025         }
2026         virtio_dev_atomic_write_link_status(dev, &link);
2027
2028         return (old.link_status == link.link_status) ? -1 : 0;
2029 }
2030
2031 static int
2032 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2033 {
2034         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2035         struct virtio_hw *hw = dev->data->dev_private;
2036
2037         if (mask & ETH_VLAN_FILTER_MASK) {
2038                 if (rxmode->hw_vlan_filter &&
2039                                 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2040
2041                         PMD_DRV_LOG(NOTICE,
2042                                 "vlan filtering not available on this host");
2043
2044                         return -ENOTSUP;
2045                 }
2046         }
2047
2048         if (mask & ETH_VLAN_STRIP_MASK)
2049                 hw->vlan_strip = rxmode->hw_vlan_strip;
2050
2051         return 0;
2052 }
2053
2054 static void
2055 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2056 {
2057         uint64_t tso_mask, host_features;
2058         struct virtio_hw *hw = dev->data->dev_private;
2059
2060         dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */
2061
2062         dev_info->pci_dev = dev->device ? RTE_ETH_DEV_TO_PCI(dev) : NULL;
2063         dev_info->max_rx_queues =
2064                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2065         dev_info->max_tx_queues =
2066                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2067         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2068         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2069         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2070         dev_info->default_txconf = (struct rte_eth_txconf) {
2071                 .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
2072         };
2073
2074         host_features = VTPCI_OPS(hw)->get_features(hw);
2075         dev_info->rx_offload_capa = 0;
2076         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2077                 dev_info->rx_offload_capa |=
2078                         DEV_RX_OFFLOAD_TCP_CKSUM |
2079                         DEV_RX_OFFLOAD_UDP_CKSUM;
2080         }
2081         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2082                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2083         if ((host_features & tso_mask) == tso_mask)
2084                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2085
2086         dev_info->tx_offload_capa = 0;
2087         if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2088                 dev_info->tx_offload_capa |=
2089                         DEV_TX_OFFLOAD_UDP_CKSUM |
2090                         DEV_TX_OFFLOAD_TCP_CKSUM;
2091         }
2092         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2093                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2094         if ((hw->guest_features & tso_mask) == tso_mask)
2095                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2096 }
2097
2098 /*
2099  * It enables testpmd to collect per queue stats.
2100  */
2101 static int
2102 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2103 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2104 __rte_unused uint8_t is_rx)
2105 {
2106         return 0;
2107 }
2108
2109 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
2110 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
2111 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci");
2112
2113 RTE_INIT(virtio_init_log);
2114 static void
2115 virtio_init_log(void)
2116 {
2117         virtio_logtype_init = rte_log_register("pmd.virtio.init");
2118         if (virtio_logtype_init >= 0)
2119                 rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE);
2120         virtio_logtype_driver = rte_log_register("pmd.virtio.driver");
2121         if (virtio_logtype_driver >= 0)
2122                 rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE);
2123 }