bf5b0119bf1078db9756d97f68c983781567ab0b
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_ip.h>
22 #include <rte_arp.h>
23 #include <rte_common.h>
24 #include <rte_errno.h>
25 #include <rte_cpuflags.h>
26 #include <rte_vect.h>
27
28 #include <rte_memory.h>
29 #include <rte_eal.h>
30 #include <rte_dev.h>
31 #include <rte_cycles.h>
32 #include <rte_kvargs.h>
33
34 #include "virtio_ethdev.h"
35 #include "virtio_pci.h"
36 #include "virtio_logs.h"
37 #include "virtqueue.h"
38 #include "virtio_rxtx.h"
39 #include "virtio_user/virtio_user_dev.h"
40
41 static int  virtio_dev_configure(struct rte_eth_dev *dev);
42 static int  virtio_dev_start(struct rte_eth_dev *dev);
43 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
45 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
46 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
47 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
48 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
49         uint32_t *speed,
50         int *vectorized);
51 static int virtio_dev_info_get(struct rte_eth_dev *dev,
52                                 struct rte_eth_dev_info *dev_info);
53 static int virtio_dev_link_update(struct rte_eth_dev *dev,
54         int wait_to_complete);
55 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
56
57 static void virtio_set_hwaddr(struct virtio_hw *hw);
58 static void virtio_get_hwaddr(struct virtio_hw *hw);
59
60 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
61                                  struct rte_eth_stats *stats);
62 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
63                                  struct rte_eth_xstat *xstats, unsigned n);
64 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
65                                        struct rte_eth_xstat_name *xstats_names,
66                                        unsigned limit);
67 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
68 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
69 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
70                                 uint16_t vlan_id, int on);
71 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
72                                 struct rte_ether_addr *mac_addr,
73                                 uint32_t index, uint32_t vmdq);
74 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
75 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
76                                 struct rte_ether_addr *mac_addr);
77
78 static int virtio_intr_disable(struct rte_eth_dev *dev);
79
80 static int virtio_dev_queue_stats_mapping_set(
81         struct rte_eth_dev *eth_dev,
82         uint16_t queue_id,
83         uint8_t stat_idx,
84         uint8_t is_rx);
85
86 static void virtio_notify_peers(struct rte_eth_dev *dev);
87 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
88
89 struct rte_virtio_xstats_name_off {
90         char name[RTE_ETH_XSTATS_NAME_SIZE];
91         unsigned offset;
92 };
93
94 /* [rt]x_qX_ is prepended to the name string here */
95 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
96         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
97         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
98         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
99         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
100         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
101         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
102         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
103         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
104         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
105         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
106         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
107         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
108         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
109 };
110
111 /* [rt]x_qX_ is prepended to the name string here */
112 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
113         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
114         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
115         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
116         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
117         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
118         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
119         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
120         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
121         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
122         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
123         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
124         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
125 };
126
127 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
128                             sizeof(rte_virtio_rxq_stat_strings[0]))
129 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
130                             sizeof(rte_virtio_txq_stat_strings[0]))
131
132 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
133
134 static struct virtio_pmd_ctrl *
135 virtio_send_command_packed(struct virtnet_ctl *cvq,
136                            struct virtio_pmd_ctrl *ctrl,
137                            int *dlen, int pkt_num)
138 {
139         struct virtqueue *vq = cvq->vq;
140         int head;
141         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
142         struct virtio_pmd_ctrl *result;
143         uint16_t flags;
144         int sum = 0;
145         int nb_descs = 0;
146         int k;
147
148         /*
149          * Format is enforced in qemu code:
150          * One TX packet for header;
151          * At least one TX packet per argument;
152          * One RX packet for ACK.
153          */
154         head = vq->vq_avail_idx;
155         flags = vq->vq_packed.cached_flags;
156         desc[head].addr = cvq->virtio_net_hdr_mem;
157         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
158         vq->vq_free_cnt--;
159         nb_descs++;
160         if (++vq->vq_avail_idx >= vq->vq_nentries) {
161                 vq->vq_avail_idx -= vq->vq_nentries;
162                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
163         }
164
165         for (k = 0; k < pkt_num; k++) {
166                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
167                         + sizeof(struct virtio_net_ctrl_hdr)
168                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
169                 desc[vq->vq_avail_idx].len = dlen[k];
170                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
171                         vq->vq_packed.cached_flags;
172                 sum += dlen[k];
173                 vq->vq_free_cnt--;
174                 nb_descs++;
175                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
176                         vq->vq_avail_idx -= vq->vq_nentries;
177                         vq->vq_packed.cached_flags ^=
178                                 VRING_PACKED_DESC_F_AVAIL_USED;
179                 }
180         }
181
182         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
183                 + sizeof(struct virtio_net_ctrl_hdr);
184         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
185         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
186                 vq->vq_packed.cached_flags;
187         vq->vq_free_cnt--;
188         nb_descs++;
189         if (++vq->vq_avail_idx >= vq->vq_nentries) {
190                 vq->vq_avail_idx -= vq->vq_nentries;
191                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
192         }
193
194         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
195                         vq->hw->weak_barriers);
196
197         virtio_wmb(vq->hw->weak_barriers);
198         virtqueue_notify(vq);
199
200         /* wait for used desc in virtqueue
201          * desc_is_used has a load-acquire or rte_io_rmb inside
202          */
203         while (!desc_is_used(&desc[head], vq))
204                 usleep(100);
205
206         /* now get used descriptors */
207         vq->vq_free_cnt += nb_descs;
208         vq->vq_used_cons_idx += nb_descs;
209         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
210                 vq->vq_used_cons_idx -= vq->vq_nentries;
211                 vq->vq_packed.used_wrap_counter ^= 1;
212         }
213
214         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
215                         "vq->vq_avail_idx=%d\n"
216                         "vq->vq_used_cons_idx=%d\n"
217                         "vq->vq_packed.cached_flags=0x%x\n"
218                         "vq->vq_packed.used_wrap_counter=%d\n",
219                         vq->vq_free_cnt,
220                         vq->vq_avail_idx,
221                         vq->vq_used_cons_idx,
222                         vq->vq_packed.cached_flags,
223                         vq->vq_packed.used_wrap_counter);
224
225         result = cvq->virtio_net_hdr_mz->addr;
226         return result;
227 }
228
229 static struct virtio_pmd_ctrl *
230 virtio_send_command_split(struct virtnet_ctl *cvq,
231                           struct virtio_pmd_ctrl *ctrl,
232                           int *dlen, int pkt_num)
233 {
234         struct virtio_pmd_ctrl *result;
235         struct virtqueue *vq = cvq->vq;
236         uint32_t head, i;
237         int k, sum = 0;
238
239         head = vq->vq_desc_head_idx;
240
241         /*
242          * Format is enforced in qemu code:
243          * One TX packet for header;
244          * At least one TX packet per argument;
245          * One RX packet for ACK.
246          */
247         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
248         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
249         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
250         vq->vq_free_cnt--;
251         i = vq->vq_split.ring.desc[head].next;
252
253         for (k = 0; k < pkt_num; k++) {
254                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
255                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
256                         + sizeof(struct virtio_net_ctrl_hdr)
257                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
258                 vq->vq_split.ring.desc[i].len = dlen[k];
259                 sum += dlen[k];
260                 vq->vq_free_cnt--;
261                 i = vq->vq_split.ring.desc[i].next;
262         }
263
264         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
265         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266                         + sizeof(struct virtio_net_ctrl_hdr);
267         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
268         vq->vq_free_cnt--;
269
270         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
271
272         vq_update_avail_ring(vq, head);
273         vq_update_avail_idx(vq);
274
275         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
276
277         virtqueue_notify(vq);
278
279         while (virtqueue_nused(vq) == 0)
280                 usleep(100);
281
282         while (virtqueue_nused(vq)) {
283                 uint32_t idx, desc_idx, used_idx;
284                 struct vring_used_elem *uep;
285
286                 used_idx = (uint32_t)(vq->vq_used_cons_idx
287                                 & (vq->vq_nentries - 1));
288                 uep = &vq->vq_split.ring.used->ring[used_idx];
289                 idx = (uint32_t) uep->id;
290                 desc_idx = idx;
291
292                 while (vq->vq_split.ring.desc[desc_idx].flags &
293                                 VRING_DESC_F_NEXT) {
294                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
295                         vq->vq_free_cnt++;
296                 }
297
298                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
299                 vq->vq_desc_head_idx = idx;
300
301                 vq->vq_used_cons_idx++;
302                 vq->vq_free_cnt++;
303         }
304
305         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
306                         vq->vq_free_cnt, vq->vq_desc_head_idx);
307
308         result = cvq->virtio_net_hdr_mz->addr;
309         return result;
310 }
311
312 static int
313 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
314                     int *dlen, int pkt_num)
315 {
316         virtio_net_ctrl_ack status = ~0;
317         struct virtio_pmd_ctrl *result;
318         struct virtqueue *vq;
319
320         ctrl->status = status;
321
322         if (!cvq || !cvq->vq) {
323                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
324                 return -1;
325         }
326
327         rte_spinlock_lock(&cvq->lock);
328         vq = cvq->vq;
329
330         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
331                 "vq->hw->cvq = %p vq = %p",
332                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
333
334         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
335                 rte_spinlock_unlock(&cvq->lock);
336                 return -1;
337         }
338
339         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
340                 sizeof(struct virtio_pmd_ctrl));
341
342         if (vtpci_packed_queue(vq->hw))
343                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
344         else
345                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
346
347         rte_spinlock_unlock(&cvq->lock);
348         return result->status;
349 }
350
351 static int
352 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
353 {
354         struct virtio_hw *hw = dev->data->dev_private;
355         struct virtio_pmd_ctrl ctrl;
356         int dlen[1];
357         int ret;
358
359         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
360         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
361         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
362
363         dlen[0] = sizeof(uint16_t);
364
365         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
366         if (ret) {
367                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
368                           "failed, this is too late now...");
369                 return -EINVAL;
370         }
371
372         return 0;
373 }
374
375 static void
376 virtio_dev_queue_release(void *queue __rte_unused)
377 {
378         /* do nothing */
379 }
380
381 static uint16_t
382 virtio_get_nr_vq(struct virtio_hw *hw)
383 {
384         uint16_t nr_vq = hw->max_queue_pairs * 2;
385
386         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
387                 nr_vq += 1;
388
389         return nr_vq;
390 }
391
392 static void
393 virtio_init_vring(struct virtqueue *vq)
394 {
395         int size = vq->vq_nentries;
396         uint8_t *ring_mem = vq->vq_ring_virt_mem;
397
398         PMD_INIT_FUNC_TRACE();
399
400         memset(ring_mem, 0, vq->vq_ring_size);
401
402         vq->vq_used_cons_idx = 0;
403         vq->vq_desc_head_idx = 0;
404         vq->vq_avail_idx = 0;
405         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
406         vq->vq_free_cnt = vq->vq_nentries;
407         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
408         if (vtpci_packed_queue(vq->hw)) {
409                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
410                                   VIRTIO_PCI_VRING_ALIGN, size);
411                 vring_desc_init_packed(vq, size);
412         } else {
413                 struct vring *vr = &vq->vq_split.ring;
414
415                 vring_init_split(vr, ring_mem, VIRTIO_PCI_VRING_ALIGN, size);
416                 vring_desc_init_split(vr->desc, size);
417         }
418         /*
419          * Disable device(host) interrupting guest
420          */
421         virtqueue_disable_intr(vq);
422 }
423
424 static int
425 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
426 {
427         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
428         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
429         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
430         unsigned int vq_size, size;
431         struct virtio_hw *hw = dev->data->dev_private;
432         struct virtnet_rx *rxvq = NULL;
433         struct virtnet_tx *txvq = NULL;
434         struct virtnet_ctl *cvq = NULL;
435         struct virtqueue *vq;
436         size_t sz_hdr_mz = 0;
437         void *sw_ring = NULL;
438         int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
439         int ret;
440         int numa_node = dev->device->numa_node;
441
442         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
443                         vtpci_queue_idx, numa_node);
444
445         /*
446          * Read the virtqueue size from the Queue Size field
447          * Always power of 2 and if 0 virtqueue does not exist
448          */
449         vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
450         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
451         if (vq_size == 0) {
452                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
453                 return -EINVAL;
454         }
455
456         if (!vtpci_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
457                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
458                 return -EINVAL;
459         }
460
461         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
462                  dev->data->port_id, vtpci_queue_idx);
463
464         size = RTE_ALIGN_CEIL(sizeof(*vq) +
465                                 vq_size * sizeof(struct vq_desc_extra),
466                                 RTE_CACHE_LINE_SIZE);
467         if (queue_type == VTNET_TQ) {
468                 /*
469                  * For each xmit packet, allocate a virtio_net_hdr
470                  * and indirect ring elements
471                  */
472                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
473         } else if (queue_type == VTNET_CQ) {
474                 /* Allocate a page for control vq command, data and status */
475                 sz_hdr_mz = PAGE_SIZE;
476         }
477
478         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
479                                 numa_node);
480         if (vq == NULL) {
481                 PMD_INIT_LOG(ERR, "can not allocate vq");
482                 return -ENOMEM;
483         }
484         hw->vqs[vtpci_queue_idx] = vq;
485
486         vq->hw = hw;
487         vq->vq_queue_index = vtpci_queue_idx;
488         vq->vq_nentries = vq_size;
489         if (vtpci_packed_queue(hw)) {
490                 vq->vq_packed.used_wrap_counter = 1;
491                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
492                 vq->vq_packed.event_flags_shadow = 0;
493                 if (queue_type == VTNET_RQ)
494                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
495         }
496
497         /*
498          * Reserve a memzone for vring elements
499          */
500         size = vring_size(hw, vq_size, VIRTIO_PCI_VRING_ALIGN);
501         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
502         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
503                      size, vq->vq_ring_size);
504
505         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
506                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
507                         VIRTIO_PCI_VRING_ALIGN);
508         if (mz == NULL) {
509                 if (rte_errno == EEXIST)
510                         mz = rte_memzone_lookup(vq_name);
511                 if (mz == NULL) {
512                         ret = -ENOMEM;
513                         goto fail_q_alloc;
514                 }
515         }
516
517         memset(mz->addr, 0, mz->len);
518
519         vq->vq_ring_mem = mz->iova;
520         vq->vq_ring_virt_mem = mz->addr;
521         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
522                      (uint64_t)mz->iova);
523         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
524                      (uint64_t)(uintptr_t)mz->addr);
525
526         virtio_init_vring(vq);
527
528         if (sz_hdr_mz) {
529                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
530                          dev->data->port_id, vtpci_queue_idx);
531                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
532                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
533                                 RTE_CACHE_LINE_SIZE);
534                 if (hdr_mz == NULL) {
535                         if (rte_errno == EEXIST)
536                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
537                         if (hdr_mz == NULL) {
538                                 ret = -ENOMEM;
539                                 goto fail_q_alloc;
540                         }
541                 }
542         }
543
544         if (queue_type == VTNET_RQ) {
545                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
546                                sizeof(vq->sw_ring[0]);
547
548                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
549                                 RTE_CACHE_LINE_SIZE, numa_node);
550                 if (!sw_ring) {
551                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
552                         ret = -ENOMEM;
553                         goto fail_q_alloc;
554                 }
555
556                 vq->sw_ring = sw_ring;
557                 rxvq = &vq->rxq;
558                 rxvq->vq = vq;
559                 rxvq->port_id = dev->data->port_id;
560                 rxvq->mz = mz;
561         } else if (queue_type == VTNET_TQ) {
562                 txvq = &vq->txq;
563                 txvq->vq = vq;
564                 txvq->port_id = dev->data->port_id;
565                 txvq->mz = mz;
566                 txvq->virtio_net_hdr_mz = hdr_mz;
567                 txvq->virtio_net_hdr_mem = hdr_mz->iova;
568         } else if (queue_type == VTNET_CQ) {
569                 cvq = &vq->cq;
570                 cvq->vq = vq;
571                 cvq->mz = mz;
572                 cvq->virtio_net_hdr_mz = hdr_mz;
573                 cvq->virtio_net_hdr_mem = hdr_mz->iova;
574                 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
575
576                 hw->cvq = cvq;
577         }
578
579         if (queue_type == VTNET_TQ) {
580                 struct virtio_tx_region *txr;
581                 unsigned int i;
582
583                 txr = hdr_mz->addr;
584                 memset(txr, 0, vq_size * sizeof(*txr));
585                 for (i = 0; i < vq_size; i++) {
586                         /* first indirect descriptor is always the tx header */
587                         if (!vtpci_packed_queue(hw)) {
588                                 struct vring_desc *start_dp = txr[i].tx_indir;
589                                 vring_desc_init_split(start_dp,
590                                                       RTE_DIM(txr[i].tx_indir));
591                                 start_dp->addr = txvq->virtio_net_hdr_mem
592                                         + i * sizeof(*txr)
593                                         + offsetof(struct virtio_tx_region,
594                                                    tx_hdr);
595                                 start_dp->len = hw->vtnet_hdr_size;
596                                 start_dp->flags = VRING_DESC_F_NEXT;
597                         } else {
598                                 struct vring_packed_desc *start_dp =
599                                         txr[i].tx_packed_indir;
600                                 vring_desc_init_indirect_packed(start_dp,
601                                       RTE_DIM(txr[i].tx_packed_indir));
602                                 start_dp->addr = txvq->virtio_net_hdr_mem
603                                         + i * sizeof(*txr)
604                                         + offsetof(struct virtio_tx_region,
605                                                    tx_hdr);
606                                 start_dp->len = hw->vtnet_hdr_size;
607                         }
608                 }
609         }
610
611         if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
612                 PMD_INIT_LOG(ERR, "setup_queue failed");
613                 return -EINVAL;
614         }
615
616         return 0;
617
618 fail_q_alloc:
619         rte_free(sw_ring);
620         rte_memzone_free(hdr_mz);
621         rte_memzone_free(mz);
622         rte_free(vq);
623
624         return ret;
625 }
626
627 static void
628 virtio_free_queues(struct virtio_hw *hw)
629 {
630         uint16_t nr_vq = virtio_get_nr_vq(hw);
631         struct virtqueue *vq;
632         int queue_type;
633         uint16_t i;
634
635         if (hw->vqs == NULL)
636                 return;
637
638         for (i = 0; i < nr_vq; i++) {
639                 vq = hw->vqs[i];
640                 if (!vq)
641                         continue;
642
643                 queue_type = virtio_get_queue_type(hw, i);
644                 if (queue_type == VTNET_RQ) {
645                         rte_free(vq->sw_ring);
646                         rte_memzone_free(vq->rxq.mz);
647                 } else if (queue_type == VTNET_TQ) {
648                         rte_memzone_free(vq->txq.mz);
649                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
650                 } else {
651                         rte_memzone_free(vq->cq.mz);
652                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
653                 }
654
655                 rte_free(vq);
656                 hw->vqs[i] = NULL;
657         }
658
659         rte_free(hw->vqs);
660         hw->vqs = NULL;
661 }
662
663 static int
664 virtio_alloc_queues(struct rte_eth_dev *dev)
665 {
666         struct virtio_hw *hw = dev->data->dev_private;
667         uint16_t nr_vq = virtio_get_nr_vq(hw);
668         uint16_t i;
669         int ret;
670
671         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
672         if (!hw->vqs) {
673                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
674                 return -ENOMEM;
675         }
676
677         for (i = 0; i < nr_vq; i++) {
678                 ret = virtio_init_queue(dev, i);
679                 if (ret < 0) {
680                         virtio_free_queues(hw);
681                         return ret;
682                 }
683         }
684
685         return 0;
686 }
687
688 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
689
690 int
691 virtio_dev_close(struct rte_eth_dev *dev)
692 {
693         struct virtio_hw *hw = dev->data->dev_private;
694         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
695
696         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
697         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
698                 return 0;
699
700         if (!hw->opened)
701                 return 0;
702         hw->opened = false;
703
704         /* reset the NIC */
705         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
706                 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
707         if (intr_conf->rxq)
708                 virtio_queues_unbind_intr(dev);
709
710         if (intr_conf->lsc || intr_conf->rxq) {
711                 virtio_intr_disable(dev);
712                 rte_intr_efd_disable(dev->intr_handle);
713                 rte_free(dev->intr_handle->intr_vec);
714                 dev->intr_handle->intr_vec = NULL;
715         }
716
717         vtpci_reset(hw);
718         virtio_dev_free_mbufs(dev);
719         virtio_free_queues(hw);
720
721 #ifdef RTE_VIRTIO_USER
722         if (hw->bus_type == VIRTIO_BUS_USER)
723                 virtio_user_dev_uninit(dev->data->dev_private);
724         else
725 #endif
726         if (dev->device) {
727                 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(dev));
728                 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY)
729                         rte_pci_ioport_unmap(VTPCI_IO(hw));
730         }
731
732         return 0;
733 }
734
735 static int
736 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
737 {
738         struct virtio_hw *hw = dev->data->dev_private;
739         struct virtio_pmd_ctrl ctrl;
740         int dlen[1];
741         int ret;
742
743         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
744                 PMD_INIT_LOG(INFO, "host does not support rx control");
745                 return -ENOTSUP;
746         }
747
748         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
749         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
750         ctrl.data[0] = 1;
751         dlen[0] = 1;
752
753         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
754         if (ret) {
755                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
756                 return -EAGAIN;
757         }
758
759         return 0;
760 }
761
762 static int
763 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
764 {
765         struct virtio_hw *hw = dev->data->dev_private;
766         struct virtio_pmd_ctrl ctrl;
767         int dlen[1];
768         int ret;
769
770         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
771                 PMD_INIT_LOG(INFO, "host does not support rx control");
772                 return -ENOTSUP;
773         }
774
775         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
776         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
777         ctrl.data[0] = 0;
778         dlen[0] = 1;
779
780         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
781         if (ret) {
782                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
783                 return -EAGAIN;
784         }
785
786         return 0;
787 }
788
789 static int
790 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
791 {
792         struct virtio_hw *hw = dev->data->dev_private;
793         struct virtio_pmd_ctrl ctrl;
794         int dlen[1];
795         int ret;
796
797         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
798                 PMD_INIT_LOG(INFO, "host does not support rx control");
799                 return -ENOTSUP;
800         }
801
802         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
803         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
804         ctrl.data[0] = 1;
805         dlen[0] = 1;
806
807         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
808         if (ret) {
809                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
810                 return -EAGAIN;
811         }
812
813         return 0;
814 }
815
816 static int
817 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
818 {
819         struct virtio_hw *hw = dev->data->dev_private;
820         struct virtio_pmd_ctrl ctrl;
821         int dlen[1];
822         int ret;
823
824         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
825                 PMD_INIT_LOG(INFO, "host does not support rx control");
826                 return -ENOTSUP;
827         }
828
829         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
830         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
831         ctrl.data[0] = 0;
832         dlen[0] = 1;
833
834         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
835         if (ret) {
836                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
837                 return -EAGAIN;
838         }
839
840         return 0;
841 }
842
843 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
844 static int
845 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
846 {
847         struct virtio_hw *hw = dev->data->dev_private;
848         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
849                                  hw->vtnet_hdr_size;
850         uint32_t frame_size = mtu + ether_hdr_len;
851         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
852
853         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
854
855         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
856                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
857                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
858                 return -EINVAL;
859         }
860         return 0;
861 }
862
863 static int
864 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
865 {
866         struct virtio_hw *hw = dev->data->dev_private;
867         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
868         struct virtqueue *vq = rxvq->vq;
869
870         virtqueue_enable_intr(vq);
871         virtio_mb(hw->weak_barriers);
872         return 0;
873 }
874
875 static int
876 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
877 {
878         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
879         struct virtqueue *vq = rxvq->vq;
880
881         virtqueue_disable_intr(vq);
882         return 0;
883 }
884
885 /*
886  * dev_ops for virtio, bare necessities for basic operation
887  */
888 static const struct eth_dev_ops virtio_eth_dev_ops = {
889         .dev_configure           = virtio_dev_configure,
890         .dev_start               = virtio_dev_start,
891         .dev_stop                = virtio_dev_stop,
892         .dev_close               = virtio_dev_close,
893         .promiscuous_enable      = virtio_dev_promiscuous_enable,
894         .promiscuous_disable     = virtio_dev_promiscuous_disable,
895         .allmulticast_enable     = virtio_dev_allmulticast_enable,
896         .allmulticast_disable    = virtio_dev_allmulticast_disable,
897         .mtu_set                 = virtio_mtu_set,
898         .dev_infos_get           = virtio_dev_info_get,
899         .stats_get               = virtio_dev_stats_get,
900         .xstats_get              = virtio_dev_xstats_get,
901         .xstats_get_names        = virtio_dev_xstats_get_names,
902         .stats_reset             = virtio_dev_stats_reset,
903         .xstats_reset            = virtio_dev_stats_reset,
904         .link_update             = virtio_dev_link_update,
905         .vlan_offload_set        = virtio_dev_vlan_offload_set,
906         .rx_queue_setup          = virtio_dev_rx_queue_setup,
907         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
908         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
909         .rx_queue_release        = virtio_dev_queue_release,
910         .tx_queue_setup          = virtio_dev_tx_queue_setup,
911         .tx_queue_release        = virtio_dev_queue_release,
912         /* collect stats per queue */
913         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
914         .vlan_filter_set         = virtio_vlan_filter_set,
915         .mac_addr_add            = virtio_mac_addr_add,
916         .mac_addr_remove         = virtio_mac_addr_remove,
917         .mac_addr_set            = virtio_mac_addr_set,
918 };
919
920 /*
921  * dev_ops for virtio-user in secondary processes, as we just have
922  * some limited supports currently.
923  */
924 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
925         .dev_infos_get           = virtio_dev_info_get,
926         .stats_get               = virtio_dev_stats_get,
927         .xstats_get              = virtio_dev_xstats_get,
928         .xstats_get_names        = virtio_dev_xstats_get_names,
929         .stats_reset             = virtio_dev_stats_reset,
930         .xstats_reset            = virtio_dev_stats_reset,
931         /* collect stats per queue */
932         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
933 };
934
935 static void
936 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
937 {
938         unsigned i;
939
940         for (i = 0; i < dev->data->nb_tx_queues; i++) {
941                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
942                 if (txvq == NULL)
943                         continue;
944
945                 stats->opackets += txvq->stats.packets;
946                 stats->obytes += txvq->stats.bytes;
947
948                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
949                         stats->q_opackets[i] = txvq->stats.packets;
950                         stats->q_obytes[i] = txvq->stats.bytes;
951                 }
952         }
953
954         for (i = 0; i < dev->data->nb_rx_queues; i++) {
955                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
956                 if (rxvq == NULL)
957                         continue;
958
959                 stats->ipackets += rxvq->stats.packets;
960                 stats->ibytes += rxvq->stats.bytes;
961                 stats->ierrors += rxvq->stats.errors;
962
963                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
964                         stats->q_ipackets[i] = rxvq->stats.packets;
965                         stats->q_ibytes[i] = rxvq->stats.bytes;
966                 }
967         }
968
969         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
970 }
971
972 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
973                                        struct rte_eth_xstat_name *xstats_names,
974                                        __rte_unused unsigned limit)
975 {
976         unsigned i;
977         unsigned count = 0;
978         unsigned t;
979
980         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
981                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
982
983         if (xstats_names != NULL) {
984                 /* Note: limit checked in rte_eth_xstats_names() */
985
986                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
987                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
988                         if (rxvq == NULL)
989                                 continue;
990                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
991                                 snprintf(xstats_names[count].name,
992                                         sizeof(xstats_names[count].name),
993                                         "rx_q%u_%s", i,
994                                         rte_virtio_rxq_stat_strings[t].name);
995                                 count++;
996                         }
997                 }
998
999                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1000                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1001                         if (txvq == NULL)
1002                                 continue;
1003                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1004                                 snprintf(xstats_names[count].name,
1005                                         sizeof(xstats_names[count].name),
1006                                         "tx_q%u_%s", i,
1007                                         rte_virtio_txq_stat_strings[t].name);
1008                                 count++;
1009                         }
1010                 }
1011                 return count;
1012         }
1013         return nstats;
1014 }
1015
1016 static int
1017 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1018                       unsigned n)
1019 {
1020         unsigned i;
1021         unsigned count = 0;
1022
1023         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1024                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1025
1026         if (n < nstats)
1027                 return nstats;
1028
1029         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1030                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1031
1032                 if (rxvq == NULL)
1033                         continue;
1034
1035                 unsigned t;
1036
1037                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1038                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1039                                 rte_virtio_rxq_stat_strings[t].offset);
1040                         xstats[count].id = count;
1041                         count++;
1042                 }
1043         }
1044
1045         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1046                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1047
1048                 if (txvq == NULL)
1049                         continue;
1050
1051                 unsigned t;
1052
1053                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1054                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1055                                 rte_virtio_txq_stat_strings[t].offset);
1056                         xstats[count].id = count;
1057                         count++;
1058                 }
1059         }
1060
1061         return count;
1062 }
1063
1064 static int
1065 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1066 {
1067         virtio_update_stats(dev, stats);
1068
1069         return 0;
1070 }
1071
1072 static int
1073 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1074 {
1075         unsigned int i;
1076
1077         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1078                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1079                 if (txvq == NULL)
1080                         continue;
1081
1082                 txvq->stats.packets = 0;
1083                 txvq->stats.bytes = 0;
1084                 txvq->stats.multicast = 0;
1085                 txvq->stats.broadcast = 0;
1086                 memset(txvq->stats.size_bins, 0,
1087                        sizeof(txvq->stats.size_bins[0]) * 8);
1088         }
1089
1090         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1091                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1092                 if (rxvq == NULL)
1093                         continue;
1094
1095                 rxvq->stats.packets = 0;
1096                 rxvq->stats.bytes = 0;
1097                 rxvq->stats.errors = 0;
1098                 rxvq->stats.multicast = 0;
1099                 rxvq->stats.broadcast = 0;
1100                 memset(rxvq->stats.size_bins, 0,
1101                        sizeof(rxvq->stats.size_bins[0]) * 8);
1102         }
1103
1104         return 0;
1105 }
1106
1107 static void
1108 virtio_set_hwaddr(struct virtio_hw *hw)
1109 {
1110         vtpci_write_dev_config(hw,
1111                         offsetof(struct virtio_net_config, mac),
1112                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1113 }
1114
1115 static void
1116 virtio_get_hwaddr(struct virtio_hw *hw)
1117 {
1118         if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1119                 vtpci_read_dev_config(hw,
1120                         offsetof(struct virtio_net_config, mac),
1121                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1122         } else {
1123                 rte_eth_random_addr(&hw->mac_addr[0]);
1124                 virtio_set_hwaddr(hw);
1125         }
1126 }
1127
1128 static int
1129 virtio_mac_table_set(struct virtio_hw *hw,
1130                      const struct virtio_net_ctrl_mac *uc,
1131                      const struct virtio_net_ctrl_mac *mc)
1132 {
1133         struct virtio_pmd_ctrl ctrl;
1134         int err, len[2];
1135
1136         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1137                 PMD_DRV_LOG(INFO, "host does not support mac table");
1138                 return -1;
1139         }
1140
1141         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1142         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1143
1144         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1145         memcpy(ctrl.data, uc, len[0]);
1146
1147         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1148         memcpy(ctrl.data + len[0], mc, len[1]);
1149
1150         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1151         if (err != 0)
1152                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1153         return err;
1154 }
1155
1156 static int
1157 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1158                     uint32_t index, uint32_t vmdq __rte_unused)
1159 {
1160         struct virtio_hw *hw = dev->data->dev_private;
1161         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1162         unsigned int i;
1163         struct virtio_net_ctrl_mac *uc, *mc;
1164
1165         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1166                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1167                 return -EINVAL;
1168         }
1169
1170         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1171                 sizeof(uc->entries));
1172         uc->entries = 0;
1173         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1174                 sizeof(mc->entries));
1175         mc->entries = 0;
1176
1177         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1178                 const struct rte_ether_addr *addr
1179                         = (i == index) ? mac_addr : addrs + i;
1180                 struct virtio_net_ctrl_mac *tbl
1181                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1182
1183                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1184         }
1185
1186         return virtio_mac_table_set(hw, uc, mc);
1187 }
1188
1189 static void
1190 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1191 {
1192         struct virtio_hw *hw = dev->data->dev_private;
1193         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1194         struct virtio_net_ctrl_mac *uc, *mc;
1195         unsigned int i;
1196
1197         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1198                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1199                 return;
1200         }
1201
1202         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1203                 sizeof(uc->entries));
1204         uc->entries = 0;
1205         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1206                 sizeof(mc->entries));
1207         mc->entries = 0;
1208
1209         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1210                 struct virtio_net_ctrl_mac *tbl;
1211
1212                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1213                         continue;
1214
1215                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1216                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1217                         RTE_ETHER_ADDR_LEN);
1218         }
1219
1220         virtio_mac_table_set(hw, uc, mc);
1221 }
1222
1223 static int
1224 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1225 {
1226         struct virtio_hw *hw = dev->data->dev_private;
1227
1228         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1229
1230         /* Use atomic update if available */
1231         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1232                 struct virtio_pmd_ctrl ctrl;
1233                 int len = RTE_ETHER_ADDR_LEN;
1234
1235                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1236                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1237
1238                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1239                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1240         }
1241
1242         if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1243                 return -ENOTSUP;
1244
1245         virtio_set_hwaddr(hw);
1246         return 0;
1247 }
1248
1249 static int
1250 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1251 {
1252         struct virtio_hw *hw = dev->data->dev_private;
1253         struct virtio_pmd_ctrl ctrl;
1254         int len;
1255
1256         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1257                 return -ENOTSUP;
1258
1259         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1260         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1261         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1262         len = sizeof(vlan_id);
1263
1264         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1265 }
1266
1267 static int
1268 virtio_intr_unmask(struct rte_eth_dev *dev)
1269 {
1270         struct virtio_hw *hw = dev->data->dev_private;
1271
1272         if (rte_intr_ack(dev->intr_handle) < 0)
1273                 return -1;
1274
1275         if (VTPCI_OPS(hw)->intr_detect)
1276                 VTPCI_OPS(hw)->intr_detect(hw);
1277
1278         return 0;
1279 }
1280
1281 static int
1282 virtio_intr_enable(struct rte_eth_dev *dev)
1283 {
1284         struct virtio_hw *hw = dev->data->dev_private;
1285
1286         if (rte_intr_enable(dev->intr_handle) < 0)
1287                 return -1;
1288
1289         if (VTPCI_OPS(hw)->intr_detect)
1290                 VTPCI_OPS(hw)->intr_detect(hw);
1291
1292         return 0;
1293 }
1294
1295 static int
1296 virtio_intr_disable(struct rte_eth_dev *dev)
1297 {
1298         struct virtio_hw *hw = dev->data->dev_private;
1299
1300         if (rte_intr_disable(dev->intr_handle) < 0)
1301                 return -1;
1302
1303         if (VTPCI_OPS(hw)->intr_detect)
1304                 VTPCI_OPS(hw)->intr_detect(hw);
1305
1306         return 0;
1307 }
1308
1309 static int
1310 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1311 {
1312         uint64_t host_features;
1313
1314         /* Prepare guest_features: feature that driver wants to support */
1315         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1316                 req_features);
1317
1318         /* Read device(host) feature bits */
1319         host_features = VTPCI_OPS(hw)->get_features(hw);
1320         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1321                 host_features);
1322
1323         /* If supported, ensure MTU value is valid before acknowledging it. */
1324         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1325                 struct virtio_net_config config;
1326
1327                 vtpci_read_dev_config(hw,
1328                         offsetof(struct virtio_net_config, mtu),
1329                         &config.mtu, sizeof(config.mtu));
1330
1331                 if (config.mtu < RTE_ETHER_MIN_MTU)
1332                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1333         }
1334
1335         /*
1336          * Negotiate features: Subset of device feature bits are written back
1337          * guest feature bits.
1338          */
1339         hw->guest_features = req_features;
1340         hw->guest_features = vtpci_negotiate_features(hw, host_features);
1341         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1342                 hw->guest_features);
1343
1344         if (hw->bus_type == VIRTIO_BUS_PCI_MODERN && !vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1345                 PMD_INIT_LOG(ERR,
1346                         "VIRTIO_F_VERSION_1 features is not enabled.");
1347                 return -1;
1348         }
1349
1350         if (hw->bus_type == VIRTIO_BUS_PCI_MODERN || hw->bus_type == VIRTIO_BUS_USER) {
1351                 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1352                 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1353                         PMD_INIT_LOG(ERR,
1354                                 "failed to set FEATURES_OK status!");
1355                         return -1;
1356                 }
1357         }
1358
1359         hw->req_guest_features = req_features;
1360
1361         return 0;
1362 }
1363
1364 int
1365 virtio_dev_pause(struct rte_eth_dev *dev)
1366 {
1367         struct virtio_hw *hw = dev->data->dev_private;
1368
1369         rte_spinlock_lock(&hw->state_lock);
1370
1371         if (hw->started == 0) {
1372                 /* Device is just stopped. */
1373                 rte_spinlock_unlock(&hw->state_lock);
1374                 return -1;
1375         }
1376         hw->started = 0;
1377         /*
1378          * Prevent the worker threads from touching queues to avoid contention,
1379          * 1 ms should be enough for the ongoing Tx function to finish.
1380          */
1381         rte_delay_ms(1);
1382         return 0;
1383 }
1384
1385 /*
1386  * Recover hw state to let the worker threads continue.
1387  */
1388 void
1389 virtio_dev_resume(struct rte_eth_dev *dev)
1390 {
1391         struct virtio_hw *hw = dev->data->dev_private;
1392
1393         hw->started = 1;
1394         rte_spinlock_unlock(&hw->state_lock);
1395 }
1396
1397 /*
1398  * Should be called only after device is paused.
1399  */
1400 int
1401 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1402                 int nb_pkts)
1403 {
1404         struct virtio_hw *hw = dev->data->dev_private;
1405         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1406         int ret;
1407
1408         hw->inject_pkts = tx_pkts;
1409         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1410         hw->inject_pkts = NULL;
1411
1412         return ret;
1413 }
1414
1415 static void
1416 virtio_notify_peers(struct rte_eth_dev *dev)
1417 {
1418         struct virtio_hw *hw = dev->data->dev_private;
1419         struct virtnet_rx *rxvq;
1420         struct rte_mbuf *rarp_mbuf;
1421
1422         if (!dev->data->rx_queues)
1423                 return;
1424
1425         rxvq = dev->data->rx_queues[0];
1426         if (!rxvq)
1427                 return;
1428
1429         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1430                         (struct rte_ether_addr *)hw->mac_addr);
1431         if (rarp_mbuf == NULL) {
1432                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1433                 return;
1434         }
1435
1436         /* If virtio port just stopped, no need to send RARP */
1437         if (virtio_dev_pause(dev) < 0) {
1438                 rte_pktmbuf_free(rarp_mbuf);
1439                 return;
1440         }
1441
1442         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1443         virtio_dev_resume(dev);
1444 }
1445
1446 static void
1447 virtio_ack_link_announce(struct rte_eth_dev *dev)
1448 {
1449         struct virtio_hw *hw = dev->data->dev_private;
1450         struct virtio_pmd_ctrl ctrl;
1451
1452         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1453         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1454
1455         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1456 }
1457
1458 /*
1459  * Process virtio config changed interrupt. Call the callback
1460  * if link state changed, generate gratuitous RARP packet if
1461  * the status indicates an ANNOUNCE.
1462  */
1463 void
1464 virtio_interrupt_handler(void *param)
1465 {
1466         struct rte_eth_dev *dev = param;
1467         struct virtio_hw *hw = dev->data->dev_private;
1468         uint8_t isr;
1469         uint16_t status;
1470
1471         /* Read interrupt status which clears interrupt */
1472         isr = vtpci_isr(hw);
1473         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1474
1475         if (virtio_intr_unmask(dev) < 0)
1476                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1477
1478         if (isr & VIRTIO_PCI_ISR_CONFIG) {
1479                 if (virtio_dev_link_update(dev, 0) == 0)
1480                         rte_eth_dev_callback_process(dev,
1481                                                      RTE_ETH_EVENT_INTR_LSC,
1482                                                      NULL);
1483
1484                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1485                         vtpci_read_dev_config(hw,
1486                                 offsetof(struct virtio_net_config, status),
1487                                 &status, sizeof(status));
1488                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1489                                 virtio_notify_peers(dev);
1490                                 if (hw->cvq)
1491                                         virtio_ack_link_announce(dev);
1492                         }
1493                 }
1494         }
1495 }
1496
1497 /* set rx and tx handlers according to what is supported */
1498 static void
1499 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1500 {
1501         struct virtio_hw *hw = eth_dev->data->dev_private;
1502
1503         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1504         if (vtpci_packed_queue(hw)) {
1505                 PMD_INIT_LOG(INFO,
1506                         "virtio: using packed ring %s Tx path on port %u",
1507                         hw->use_vec_tx ? "vectorized" : "standard",
1508                         eth_dev->data->port_id);
1509                 if (hw->use_vec_tx)
1510                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1511                 else
1512                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1513         } else {
1514                 if (hw->use_inorder_tx) {
1515                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1516                                 eth_dev->data->port_id);
1517                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1518                 } else {
1519                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1520                                 eth_dev->data->port_id);
1521                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1522                 }
1523         }
1524
1525         if (vtpci_packed_queue(hw)) {
1526                 if (hw->use_vec_rx) {
1527                         PMD_INIT_LOG(INFO,
1528                                 "virtio: using packed ring vectorized Rx path on port %u",
1529                                 eth_dev->data->port_id);
1530                         eth_dev->rx_pkt_burst =
1531                                 &virtio_recv_pkts_packed_vec;
1532                 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1533                         PMD_INIT_LOG(INFO,
1534                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1535                                 eth_dev->data->port_id);
1536                         eth_dev->rx_pkt_burst =
1537                                 &virtio_recv_mergeable_pkts_packed;
1538                 } else {
1539                         PMD_INIT_LOG(INFO,
1540                                 "virtio: using packed ring standard Rx path on port %u",
1541                                 eth_dev->data->port_id);
1542                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1543                 }
1544         } else {
1545                 if (hw->use_vec_rx) {
1546                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1547                                 eth_dev->data->port_id);
1548                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1549                 } else if (hw->use_inorder_rx) {
1550                         PMD_INIT_LOG(INFO,
1551                                 "virtio: using inorder Rx path on port %u",
1552                                 eth_dev->data->port_id);
1553                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1554                 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1555                         PMD_INIT_LOG(INFO,
1556                                 "virtio: using mergeable buffer Rx path on port %u",
1557                                 eth_dev->data->port_id);
1558                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1559                 } else {
1560                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1561                                 eth_dev->data->port_id);
1562                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1563                 }
1564         }
1565
1566 }
1567
1568 /* Only support 1:1 queue/interrupt mapping so far.
1569  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1570  * interrupt vectors (<N+1).
1571  */
1572 static int
1573 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1574 {
1575         uint32_t i;
1576         struct virtio_hw *hw = dev->data->dev_private;
1577
1578         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1579         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1580                 dev->intr_handle->intr_vec[i] = i + 1;
1581                 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1582                                                  VIRTIO_MSI_NO_VECTOR) {
1583                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1584                         return -EBUSY;
1585                 }
1586         }
1587
1588         return 0;
1589 }
1590
1591 static void
1592 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1593 {
1594         uint32_t i;
1595         struct virtio_hw *hw = dev->data->dev_private;
1596
1597         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1598         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1599                 VTPCI_OPS(hw)->set_queue_irq(hw,
1600                                              hw->vqs[i * VTNET_CQ],
1601                                              VIRTIO_MSI_NO_VECTOR);
1602 }
1603
1604 static int
1605 virtio_configure_intr(struct rte_eth_dev *dev)
1606 {
1607         struct virtio_hw *hw = dev->data->dev_private;
1608
1609         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1610                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1611                 return -ENOTSUP;
1612         }
1613
1614         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1615                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1616                 return -1;
1617         }
1618
1619         if (!dev->intr_handle->intr_vec) {
1620                 dev->intr_handle->intr_vec =
1621                         rte_zmalloc("intr_vec",
1622                                     hw->max_queue_pairs * sizeof(int), 0);
1623                 if (!dev->intr_handle->intr_vec) {
1624                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1625                                      hw->max_queue_pairs);
1626                         return -ENOMEM;
1627                 }
1628         }
1629
1630         /* Re-register callback to update max_intr */
1631         rte_intr_callback_unregister(dev->intr_handle,
1632                                      virtio_interrupt_handler,
1633                                      dev);
1634         rte_intr_callback_register(dev->intr_handle,
1635                                    virtio_interrupt_handler,
1636                                    dev);
1637
1638         /* DO NOT try to remove this! This function will enable msix, or QEMU
1639          * will encounter SIGSEGV when DRIVER_OK is sent.
1640          * And for legacy devices, this should be done before queue/vec binding
1641          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1642          * (22) will be ignored.
1643          */
1644         if (virtio_intr_enable(dev) < 0) {
1645                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1646                 return -1;
1647         }
1648
1649         if (virtio_queues_bind_intr(dev) < 0) {
1650                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1651                 return -1;
1652         }
1653
1654         return 0;
1655 }
1656 #define DUPLEX_UNKNOWN   0xff
1657 /* reset device and renegotiate features if needed */
1658 static int
1659 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1660 {
1661         struct virtio_hw *hw = eth_dev->data->dev_private;
1662         struct virtio_net_config *config;
1663         struct virtio_net_config local_config;
1664         int ret;
1665
1666         /* Reset the device although not necessary at startup */
1667         vtpci_reset(hw);
1668
1669         if (hw->vqs) {
1670                 virtio_dev_free_mbufs(eth_dev);
1671                 virtio_free_queues(hw);
1672         }
1673
1674         /* Tell the host we've noticed this device. */
1675         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1676
1677         /* Tell the host we've known how to drive the device. */
1678         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1679         if (virtio_negotiate_features(hw, req_features) < 0)
1680                 return -1;
1681
1682         hw->weak_barriers = !vtpci_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1683
1684         /* If host does not support both status and MSI-X then disable LSC */
1685         if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1686             hw->use_msix != VIRTIO_MSIX_NONE)
1687                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1688         else
1689                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1690
1691         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1692
1693         /* Setting up rx_header size for the device */
1694         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1695             vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
1696             vtpci_with_feature(hw, VIRTIO_F_RING_PACKED))
1697                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1698         else
1699                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1700
1701         /* Copy the permanent MAC address to: virtio_hw */
1702         virtio_get_hwaddr(hw);
1703         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1704                         &eth_dev->data->mac_addrs[0]);
1705         PMD_INIT_LOG(DEBUG,
1706                      "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1707                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1708                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1709
1710         if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1711                 if (vtpci_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1712                         config = &local_config;
1713                         vtpci_read_dev_config(hw,
1714                                 offsetof(struct virtio_net_config, speed),
1715                                 &config->speed, sizeof(config->speed));
1716                         vtpci_read_dev_config(hw,
1717                                 offsetof(struct virtio_net_config, duplex),
1718                                 &config->duplex, sizeof(config->duplex));
1719                         hw->speed = config->speed;
1720                         hw->duplex = config->duplex;
1721                 }
1722         }
1723         if (hw->duplex == DUPLEX_UNKNOWN)
1724                 hw->duplex = ETH_LINK_FULL_DUPLEX;
1725         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1726                 hw->speed, hw->duplex);
1727         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1728                 config = &local_config;
1729
1730                 vtpci_read_dev_config(hw,
1731                         offsetof(struct virtio_net_config, mac),
1732                         &config->mac, sizeof(config->mac));
1733
1734                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1735                         vtpci_read_dev_config(hw,
1736                                 offsetof(struct virtio_net_config, status),
1737                                 &config->status, sizeof(config->status));
1738                 } else {
1739                         PMD_INIT_LOG(DEBUG,
1740                                      "VIRTIO_NET_F_STATUS is not supported");
1741                         config->status = 0;
1742                 }
1743
1744                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1745                         vtpci_read_dev_config(hw,
1746                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1747                                 &config->max_virtqueue_pairs,
1748                                 sizeof(config->max_virtqueue_pairs));
1749                 } else {
1750                         PMD_INIT_LOG(DEBUG,
1751                                      "VIRTIO_NET_F_MQ is not supported");
1752                         config->max_virtqueue_pairs = 1;
1753                 }
1754
1755                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1756
1757                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1758                         vtpci_read_dev_config(hw,
1759                                 offsetof(struct virtio_net_config, mtu),
1760                                 &config->mtu,
1761                                 sizeof(config->mtu));
1762
1763                         /*
1764                          * MTU value has already been checked at negotiation
1765                          * time, but check again in case it has changed since
1766                          * then, which should not happen.
1767                          */
1768                         if (config->mtu < RTE_ETHER_MIN_MTU) {
1769                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1770                                                 config->mtu);
1771                                 return -1;
1772                         }
1773
1774                         hw->max_mtu = config->mtu;
1775                         /* Set initial MTU to maximum one supported by vhost */
1776                         eth_dev->data->mtu = config->mtu;
1777
1778                 } else {
1779                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1780                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1781                 }
1782
1783                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1784                                 config->max_virtqueue_pairs);
1785                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1786                 PMD_INIT_LOG(DEBUG,
1787                                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1788                                 config->mac[0], config->mac[1],
1789                                 config->mac[2], config->mac[3],
1790                                 config->mac[4], config->mac[5]);
1791         } else {
1792                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1793                 hw->max_queue_pairs = 1;
1794                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1795                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1796         }
1797
1798         ret = virtio_alloc_queues(eth_dev);
1799         if (ret < 0)
1800                 return ret;
1801
1802         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1803                 if (virtio_configure_intr(eth_dev) < 0) {
1804                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1805                         virtio_free_queues(hw);
1806                         return -1;
1807                 }
1808         }
1809
1810         vtpci_reinit_complete(hw);
1811
1812         return 0;
1813 }
1814
1815
1816 static void
1817 virtio_set_vtpci_ops(struct virtio_hw *hw)
1818 {
1819 #ifdef RTE_VIRTIO_USER
1820         if (hw->bus_type == VIRTIO_BUS_USER)
1821                 VTPCI_OPS(hw) = &virtio_user_ops;
1822         else
1823 #endif
1824         if (hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1825                 VTPCI_OPS(hw) = &modern_ops;
1826         else if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY)
1827                 VTPCI_OPS(hw) = &legacy_ops;
1828
1829         return;
1830 }
1831
1832 /*
1833  * This function is based on probe() function in virtio_pci.c
1834  * It returns 0 on success.
1835  */
1836 int
1837 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1838 {
1839         struct virtio_hw *hw = eth_dev->data->dev_private;
1840         uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1841         int vectorized = 0;
1842         int ret;
1843
1844         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1845                 PMD_INIT_LOG(ERR,
1846                         "Not sufficient headroom required = %d, avail = %d",
1847                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1848                         RTE_PKTMBUF_HEADROOM);
1849
1850                 return -1;
1851         }
1852
1853         eth_dev->dev_ops = &virtio_eth_dev_ops;
1854         eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1855
1856         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1857                 virtio_set_vtpci_ops(hw);
1858                 set_rxtx_funcs(eth_dev);
1859                 return 0;
1860         }
1861
1862         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1863         if (ret < 0)
1864                 return ret;
1865         hw->speed = speed;
1866
1867         /* Allocate memory for storing MAC addresses */
1868         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1869                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1870         if (eth_dev->data->mac_addrs == NULL) {
1871                 PMD_INIT_LOG(ERR,
1872                         "Failed to allocate %d bytes needed to store MAC addresses",
1873                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1874                 return -ENOMEM;
1875         }
1876
1877         hw->port_id = eth_dev->data->port_id;
1878         rte_spinlock_init(&hw->state_lock);
1879
1880         /* reset device and negotiate default features */
1881         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1882         if (ret < 0)
1883                 goto err_virtio_init;
1884
1885         if (vectorized) {
1886                 if (!vtpci_packed_queue(hw)) {
1887                         hw->use_vec_rx = 1;
1888                 } else {
1889 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1890                         hw->use_vec_rx = 1;
1891                         hw->use_vec_tx = 1;
1892 #else
1893                         PMD_DRV_LOG(INFO,
1894                                 "building environment do not support packed ring vectorized");
1895 #endif
1896                 }
1897         }
1898
1899         hw->opened = true;
1900
1901         return 0;
1902
1903 err_virtio_init:
1904         rte_free(eth_dev->data->mac_addrs);
1905         eth_dev->data->mac_addrs = NULL;
1906         return ret;
1907 }
1908
1909 static uint32_t
1910 virtio_dev_speed_capa_get(uint32_t speed)
1911 {
1912         switch (speed) {
1913         case ETH_SPEED_NUM_10G:
1914                 return ETH_LINK_SPEED_10G;
1915         case ETH_SPEED_NUM_20G:
1916                 return ETH_LINK_SPEED_20G;
1917         case ETH_SPEED_NUM_25G:
1918                 return ETH_LINK_SPEED_25G;
1919         case ETH_SPEED_NUM_40G:
1920                 return ETH_LINK_SPEED_40G;
1921         case ETH_SPEED_NUM_50G:
1922                 return ETH_LINK_SPEED_50G;
1923         case ETH_SPEED_NUM_56G:
1924                 return ETH_LINK_SPEED_56G;
1925         case ETH_SPEED_NUM_100G:
1926                 return ETH_LINK_SPEED_100G;
1927         case ETH_SPEED_NUM_200G:
1928                 return ETH_LINK_SPEED_200G;
1929         default:
1930                 return 0;
1931         }
1932 }
1933
1934 static int vectorized_check_handler(__rte_unused const char *key,
1935                 const char *value, void *ret_val)
1936 {
1937         if (strcmp(value, "1") == 0)
1938                 *(int *)ret_val = 1;
1939         else
1940                 *(int *)ret_val = 0;
1941
1942         return 0;
1943 }
1944
1945 #define VIRTIO_ARG_SPEED      "speed"
1946 #define VIRTIO_ARG_VECTORIZED "vectorized"
1947
1948 static int
1949 link_speed_handler(const char *key __rte_unused,
1950                 const char *value, void *ret_val)
1951 {
1952         uint32_t val;
1953         if (!value || !ret_val)
1954                 return -EINVAL;
1955         val = strtoul(value, NULL, 0);
1956         /* validate input */
1957         if (virtio_dev_speed_capa_get(val) == 0)
1958                 return -EINVAL;
1959         *(uint32_t *)ret_val = val;
1960
1961         return 0;
1962 }
1963
1964
1965 static int
1966 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
1967 {
1968         struct rte_kvargs *kvlist;
1969         int ret = 0;
1970
1971         if (devargs == NULL)
1972                 return 0;
1973
1974         kvlist = rte_kvargs_parse(devargs->args, NULL);
1975         if (kvlist == NULL) {
1976                 PMD_INIT_LOG(ERR, "error when parsing param");
1977                 return 0;
1978         }
1979
1980         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
1981                 ret = rte_kvargs_process(kvlist,
1982                                         VIRTIO_ARG_SPEED,
1983                                         link_speed_handler, speed);
1984                 if (ret < 0) {
1985                         PMD_INIT_LOG(ERR, "Failed to parse %s",
1986                                         VIRTIO_ARG_SPEED);
1987                         goto exit;
1988                 }
1989         }
1990
1991         if (vectorized &&
1992                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
1993                 ret = rte_kvargs_process(kvlist,
1994                                 VIRTIO_ARG_VECTORIZED,
1995                                 vectorized_check_handler, vectorized);
1996                 if (ret < 0) {
1997                         PMD_INIT_LOG(ERR, "Failed to parse %s",
1998                                         VIRTIO_ARG_VECTORIZED);
1999                         goto exit;
2000                 }
2001         }
2002
2003 exit:
2004         rte_kvargs_free(kvlist);
2005         return ret;
2006 }
2007
2008 static bool
2009 rx_offload_enabled(struct virtio_hw *hw)
2010 {
2011         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2012                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2013                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2014 }
2015
2016 static bool
2017 tx_offload_enabled(struct virtio_hw *hw)
2018 {
2019         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2020                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2021                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2022 }
2023
2024 /*
2025  * Configure virtio device
2026  * It returns 0 on success.
2027  */
2028 static int
2029 virtio_dev_configure(struct rte_eth_dev *dev)
2030 {
2031         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2032         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2033         struct virtio_hw *hw = dev->data->dev_private;
2034         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2035                 hw->vtnet_hdr_size;
2036         uint64_t rx_offloads = rxmode->offloads;
2037         uint64_t tx_offloads = txmode->offloads;
2038         uint64_t req_features;
2039         int ret;
2040
2041         PMD_INIT_LOG(DEBUG, "configure");
2042         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2043
2044         if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2045                 PMD_DRV_LOG(ERR,
2046                         "Unsupported Rx multi queue mode %d",
2047                         rxmode->mq_mode);
2048                 return -EINVAL;
2049         }
2050
2051         if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2052                 PMD_DRV_LOG(ERR,
2053                         "Unsupported Tx multi queue mode %d",
2054                         txmode->mq_mode);
2055                 return -EINVAL;
2056         }
2057
2058         if (dev->data->dev_conf.intr_conf.rxq) {
2059                 ret = virtio_init_device(dev, hw->req_guest_features);
2060                 if (ret < 0)
2061                         return ret;
2062         }
2063
2064         if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len)
2065                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2066
2067         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2068                            DEV_RX_OFFLOAD_TCP_CKSUM))
2069                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2070
2071         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2072                 req_features |=
2073                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2074                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2075
2076         if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2077                            DEV_TX_OFFLOAD_TCP_CKSUM))
2078                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2079
2080         if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2081                 req_features |=
2082                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2083                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2084
2085         /* if request features changed, reinit the device */
2086         if (req_features != hw->req_guest_features) {
2087                 ret = virtio_init_device(dev, req_features);
2088                 if (ret < 0)
2089                         return ret;
2090         }
2091
2092         if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2093                             DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2094                 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2095                 PMD_DRV_LOG(ERR,
2096                         "rx checksum not available on this host");
2097                 return -ENOTSUP;
2098         }
2099
2100         if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2101                 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2102                  !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2103                 PMD_DRV_LOG(ERR,
2104                         "Large Receive Offload not available on this host");
2105                 return -ENOTSUP;
2106         }
2107
2108         /* start control queue */
2109         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2110                 virtio_dev_cq_start(dev);
2111
2112         if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2113                 hw->vlan_strip = 1;
2114
2115         if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2116             && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2117                 PMD_DRV_LOG(ERR,
2118                             "vlan filtering not available on this host");
2119                 return -ENOTSUP;
2120         }
2121
2122         hw->has_tx_offload = tx_offload_enabled(hw);
2123         hw->has_rx_offload = rx_offload_enabled(hw);
2124
2125         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2126                 /* Enable vector (0) for Link State Intrerrupt */
2127                 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
2128                                 VIRTIO_MSI_NO_VECTOR) {
2129                         PMD_DRV_LOG(ERR, "failed to set config vector");
2130                         return -EBUSY;
2131                 }
2132
2133         if (vtpci_packed_queue(hw)) {
2134 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2135                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2136                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2137                      !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2138                      !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
2139                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2140                         PMD_DRV_LOG(INFO,
2141                                 "disabled packed ring vectorized path for requirements not met");
2142                         hw->use_vec_rx = 0;
2143                         hw->use_vec_tx = 0;
2144                 }
2145 #elif defined(RTE_ARCH_ARM)
2146                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2147                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2148                      !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2149                      !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
2150                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2151                         PMD_DRV_LOG(INFO,
2152                                 "disabled packed ring vectorized path for requirements not met");
2153                         hw->use_vec_rx = 0;
2154                         hw->use_vec_tx = 0;
2155                 }
2156 #else
2157                 hw->use_vec_rx = 0;
2158                 hw->use_vec_tx = 0;
2159 #endif
2160
2161                 if (hw->use_vec_rx) {
2162                         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2163                                 PMD_DRV_LOG(INFO,
2164                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2165                                 hw->use_vec_rx = 0;
2166                         }
2167
2168                         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2169                                 PMD_DRV_LOG(INFO,
2170                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2171                                 hw->use_vec_rx = 0;
2172                         }
2173                 }
2174         } else {
2175                 if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2176                         hw->use_inorder_tx = 1;
2177                         hw->use_inorder_rx = 1;
2178                         hw->use_vec_rx = 0;
2179                 }
2180
2181                 if (hw->use_vec_rx) {
2182 #if defined RTE_ARCH_ARM
2183                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2184                                 PMD_DRV_LOG(INFO,
2185                                         "disabled split ring vectorized path for requirement not met");
2186                                 hw->use_vec_rx = 0;
2187                         }
2188 #endif
2189                         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2190                                 PMD_DRV_LOG(INFO,
2191                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2192                                 hw->use_vec_rx = 0;
2193                         }
2194
2195                         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2196                                            DEV_RX_OFFLOAD_TCP_CKSUM |
2197                                            DEV_RX_OFFLOAD_TCP_LRO |
2198                                            DEV_RX_OFFLOAD_VLAN_STRIP)) {
2199                                 PMD_DRV_LOG(INFO,
2200                                         "disabled split ring vectorized rx for offloading enabled");
2201                                 hw->use_vec_rx = 0;
2202                         }
2203
2204                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2205                                 PMD_DRV_LOG(INFO,
2206                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2207                                 hw->use_vec_rx = 0;
2208                         }
2209                 }
2210         }
2211
2212         return 0;
2213 }
2214
2215
2216 static int
2217 virtio_dev_start(struct rte_eth_dev *dev)
2218 {
2219         uint16_t nb_queues, i;
2220         struct virtnet_rx *rxvq;
2221         struct virtnet_tx *txvq __rte_unused;
2222         struct virtio_hw *hw = dev->data->dev_private;
2223         int ret;
2224
2225         /* Finish the initialization of the queues */
2226         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2227                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2228                 if (ret < 0)
2229                         return ret;
2230         }
2231         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2232                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2233                 if (ret < 0)
2234                         return ret;
2235         }
2236
2237         /* check if lsc interrupt feature is enabled */
2238         if (dev->data->dev_conf.intr_conf.lsc) {
2239                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2240                         PMD_DRV_LOG(ERR, "link status not supported by host");
2241                         return -ENOTSUP;
2242                 }
2243         }
2244
2245         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2246          * in device configure, but it could be unmapped  when device is
2247          * stopped.
2248          */
2249         if (dev->data->dev_conf.intr_conf.lsc ||
2250             dev->data->dev_conf.intr_conf.rxq) {
2251                 virtio_intr_disable(dev);
2252
2253                 /* Setup interrupt callback  */
2254                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2255                         rte_intr_callback_register(dev->intr_handle,
2256                                                    virtio_interrupt_handler,
2257                                                    dev);
2258
2259                 if (virtio_intr_enable(dev) < 0) {
2260                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2261                         return -EIO;
2262                 }
2263         }
2264
2265         /*Notify the backend
2266          *Otherwise the tap backend might already stop its queue due to fullness.
2267          *vhost backend will have no chance to be waked up
2268          */
2269         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2270         if (hw->max_queue_pairs > 1) {
2271                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2272                         return -EINVAL;
2273         }
2274
2275         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2276
2277         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2278                 rxvq = dev->data->rx_queues[i];
2279                 /* Flush the old packets */
2280                 virtqueue_rxvq_flush(rxvq->vq);
2281                 virtqueue_notify(rxvq->vq);
2282         }
2283
2284         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2285                 txvq = dev->data->tx_queues[i];
2286                 virtqueue_notify(txvq->vq);
2287         }
2288
2289         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2290
2291         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2292                 rxvq = dev->data->rx_queues[i];
2293                 VIRTQUEUE_DUMP(rxvq->vq);
2294         }
2295
2296         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2297                 txvq = dev->data->tx_queues[i];
2298                 VIRTQUEUE_DUMP(txvq->vq);
2299         }
2300
2301         set_rxtx_funcs(dev);
2302         hw->started = true;
2303
2304         /* Initialize Link state */
2305         virtio_dev_link_update(dev, 0);
2306
2307         return 0;
2308 }
2309
2310 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2311 {
2312         struct virtio_hw *hw = dev->data->dev_private;
2313         uint16_t nr_vq = virtio_get_nr_vq(hw);
2314         const char *type __rte_unused;
2315         unsigned int i, mbuf_num = 0;
2316         struct virtqueue *vq;
2317         struct rte_mbuf *buf;
2318         int queue_type;
2319
2320         if (hw->vqs == NULL)
2321                 return;
2322
2323         for (i = 0; i < nr_vq; i++) {
2324                 vq = hw->vqs[i];
2325                 if (!vq)
2326                         continue;
2327
2328                 queue_type = virtio_get_queue_type(hw, i);
2329                 if (queue_type == VTNET_RQ)
2330                         type = "rxq";
2331                 else if (queue_type == VTNET_TQ)
2332                         type = "txq";
2333                 else
2334                         continue;
2335
2336                 PMD_INIT_LOG(DEBUG,
2337                         "Before freeing %s[%d] used and unused buf",
2338                         type, i);
2339                 VIRTQUEUE_DUMP(vq);
2340
2341                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2342                         rte_pktmbuf_free(buf);
2343                         mbuf_num++;
2344                 }
2345
2346                 PMD_INIT_LOG(DEBUG,
2347                         "After freeing %s[%d] used and unused buf",
2348                         type, i);
2349                 VIRTQUEUE_DUMP(vq);
2350         }
2351
2352         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2353 }
2354
2355 /*
2356  * Stop device: disable interrupt and mark link down
2357  */
2358 int
2359 virtio_dev_stop(struct rte_eth_dev *dev)
2360 {
2361         struct virtio_hw *hw = dev->data->dev_private;
2362         struct rte_eth_link link;
2363         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2364
2365         PMD_INIT_LOG(DEBUG, "stop");
2366         dev->data->dev_started = 0;
2367
2368         rte_spinlock_lock(&hw->state_lock);
2369         if (!hw->started)
2370                 goto out_unlock;
2371         hw->started = false;
2372
2373         if (intr_conf->lsc || intr_conf->rxq) {
2374                 virtio_intr_disable(dev);
2375
2376                 /* Reset interrupt callback  */
2377                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2378                         rte_intr_callback_unregister(dev->intr_handle,
2379                                                      virtio_interrupt_handler,
2380                                                      dev);
2381                 }
2382         }
2383
2384         memset(&link, 0, sizeof(link));
2385         rte_eth_linkstatus_set(dev, &link);
2386 out_unlock:
2387         rte_spinlock_unlock(&hw->state_lock);
2388
2389         return 0;
2390 }
2391
2392 static int
2393 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2394 {
2395         struct rte_eth_link link;
2396         uint16_t status;
2397         struct virtio_hw *hw = dev->data->dev_private;
2398
2399         memset(&link, 0, sizeof(link));
2400         link.link_duplex = hw->duplex;
2401         link.link_speed  = hw->speed;
2402         link.link_autoneg = ETH_LINK_AUTONEG;
2403
2404         if (!hw->started) {
2405                 link.link_status = ETH_LINK_DOWN;
2406                 link.link_speed = ETH_SPEED_NUM_NONE;
2407         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2408                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2409                 vtpci_read_dev_config(hw,
2410                                 offsetof(struct virtio_net_config, status),
2411                                 &status, sizeof(status));
2412                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2413                         link.link_status = ETH_LINK_DOWN;
2414                         link.link_speed = ETH_SPEED_NUM_NONE;
2415                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2416                                      dev->data->port_id);
2417                 } else {
2418                         link.link_status = ETH_LINK_UP;
2419                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2420                                      dev->data->port_id);
2421                 }
2422         } else {
2423                 link.link_status = ETH_LINK_UP;
2424         }
2425
2426         return rte_eth_linkstatus_set(dev, &link);
2427 }
2428
2429 static int
2430 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2431 {
2432         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2433         struct virtio_hw *hw = dev->data->dev_private;
2434         uint64_t offloads = rxmode->offloads;
2435
2436         if (mask & ETH_VLAN_FILTER_MASK) {
2437                 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2438                                 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2439
2440                         PMD_DRV_LOG(NOTICE,
2441                                 "vlan filtering not available on this host");
2442
2443                         return -ENOTSUP;
2444                 }
2445         }
2446
2447         if (mask & ETH_VLAN_STRIP_MASK)
2448                 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2449
2450         return 0;
2451 }
2452
2453 static int
2454 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2455 {
2456         uint64_t tso_mask, host_features;
2457         struct virtio_hw *hw = dev->data->dev_private;
2458         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2459
2460         dev_info->max_rx_queues =
2461                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2462         dev_info->max_tx_queues =
2463                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2464         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2465         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2466         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2467
2468         host_features = VTPCI_OPS(hw)->get_features(hw);
2469         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2470         dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2471         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2472                 dev_info->rx_offload_capa |=
2473                         DEV_RX_OFFLOAD_TCP_CKSUM |
2474                         DEV_RX_OFFLOAD_UDP_CKSUM;
2475         }
2476         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2477                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2478         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2479                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2480         if ((host_features & tso_mask) == tso_mask)
2481                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2482
2483         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2484                                     DEV_TX_OFFLOAD_VLAN_INSERT;
2485         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2486                 dev_info->tx_offload_capa |=
2487                         DEV_TX_OFFLOAD_UDP_CKSUM |
2488                         DEV_TX_OFFLOAD_TCP_CKSUM;
2489         }
2490         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2491                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2492         if ((host_features & tso_mask) == tso_mask)
2493                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2494
2495         return 0;
2496 }
2497
2498 /*
2499  * It enables testpmd to collect per queue stats.
2500  */
2501 static int
2502 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2503 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2504 __rte_unused uint8_t is_rx)
2505 {
2506         return 0;
2507 }
2508
2509 RTE_LOG_REGISTER(virtio_logtype_init, pmd.net.virtio.init, NOTICE);
2510 RTE_LOG_REGISTER(virtio_logtype_driver, pmd.net.virtio.driver, NOTICE);