net/virtio: improve queue init error path
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_user/virtio_user_dev.h"
37
38 static int  virtio_dev_configure(struct rte_eth_dev *dev);
39 static int  virtio_dev_start(struct rte_eth_dev *dev);
40 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
42 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
44 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
45 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
46         uint32_t *speed,
47         int *vectorized);
48 static int virtio_dev_info_get(struct rte_eth_dev *dev,
49                                 struct rte_eth_dev_info *dev_info);
50 static int virtio_dev_link_update(struct rte_eth_dev *dev,
51         int wait_to_complete);
52 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
53
54 static void virtio_set_hwaddr(struct virtio_hw *hw);
55 static void virtio_get_hwaddr(struct virtio_hw *hw);
56
57 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
58                                  struct rte_eth_stats *stats);
59 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
60                                  struct rte_eth_xstat *xstats, unsigned n);
61 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
62                                        struct rte_eth_xstat_name *xstats_names,
63                                        unsigned limit);
64 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
65 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
66 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
67                                 uint16_t vlan_id, int on);
68 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
69                                 struct rte_ether_addr *mac_addr,
70                                 uint32_t index, uint32_t vmdq);
71 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
72 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
73                                 struct rte_ether_addr *mac_addr);
74
75 static int virtio_intr_disable(struct rte_eth_dev *dev);
76
77 static int virtio_dev_queue_stats_mapping_set(
78         struct rte_eth_dev *eth_dev,
79         uint16_t queue_id,
80         uint8_t stat_idx,
81         uint8_t is_rx);
82
83 static void virtio_notify_peers(struct rte_eth_dev *dev);
84 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
85
86 struct rte_virtio_xstats_name_off {
87         char name[RTE_ETH_XSTATS_NAME_SIZE];
88         unsigned offset;
89 };
90
91 /* [rt]x_qX_ is prepended to the name string here */
92 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
93         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
94         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
95         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
96         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
97         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
98         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
99         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
100         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
101         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
102         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
103         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
104         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
105         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
106 };
107
108 /* [rt]x_qX_ is prepended to the name string here */
109 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
110         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
111         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
112         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
113         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
114         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
115         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
116         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
117         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
118         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
119         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
120         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
121         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
122 };
123
124 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
125                             sizeof(rte_virtio_rxq_stat_strings[0]))
126 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
127                             sizeof(rte_virtio_txq_stat_strings[0]))
128
129 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
130
131 static struct virtio_pmd_ctrl *
132 virtio_send_command_packed(struct virtnet_ctl *cvq,
133                            struct virtio_pmd_ctrl *ctrl,
134                            int *dlen, int pkt_num)
135 {
136         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
137         int head;
138         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
139         struct virtio_pmd_ctrl *result;
140         uint16_t flags;
141         int sum = 0;
142         int nb_descs = 0;
143         int k;
144
145         /*
146          * Format is enforced in qemu code:
147          * One TX packet for header;
148          * At least one TX packet per argument;
149          * One RX packet for ACK.
150          */
151         head = vq->vq_avail_idx;
152         flags = vq->vq_packed.cached_flags;
153         desc[head].addr = cvq->virtio_net_hdr_mem;
154         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
155         vq->vq_free_cnt--;
156         nb_descs++;
157         if (++vq->vq_avail_idx >= vq->vq_nentries) {
158                 vq->vq_avail_idx -= vq->vq_nentries;
159                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
160         }
161
162         for (k = 0; k < pkt_num; k++) {
163                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
164                         + sizeof(struct virtio_net_ctrl_hdr)
165                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
166                 desc[vq->vq_avail_idx].len = dlen[k];
167                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
168                         vq->vq_packed.cached_flags;
169                 sum += dlen[k];
170                 vq->vq_free_cnt--;
171                 nb_descs++;
172                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
173                         vq->vq_avail_idx -= vq->vq_nentries;
174                         vq->vq_packed.cached_flags ^=
175                                 VRING_PACKED_DESC_F_AVAIL_USED;
176                 }
177         }
178
179         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
180                 + sizeof(struct virtio_net_ctrl_hdr);
181         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
182         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
183                 vq->vq_packed.cached_flags;
184         vq->vq_free_cnt--;
185         nb_descs++;
186         if (++vq->vq_avail_idx >= vq->vq_nentries) {
187                 vq->vq_avail_idx -= vq->vq_nentries;
188                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
189         }
190
191         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
192                         vq->hw->weak_barriers);
193
194         virtio_wmb(vq->hw->weak_barriers);
195         virtqueue_notify(vq);
196
197         /* wait for used desc in virtqueue
198          * desc_is_used has a load-acquire or rte_io_rmb inside
199          */
200         while (!desc_is_used(&desc[head], vq))
201                 usleep(100);
202
203         /* now get used descriptors */
204         vq->vq_free_cnt += nb_descs;
205         vq->vq_used_cons_idx += nb_descs;
206         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
207                 vq->vq_used_cons_idx -= vq->vq_nentries;
208                 vq->vq_packed.used_wrap_counter ^= 1;
209         }
210
211         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
212                         "vq->vq_avail_idx=%d\n"
213                         "vq->vq_used_cons_idx=%d\n"
214                         "vq->vq_packed.cached_flags=0x%x\n"
215                         "vq->vq_packed.used_wrap_counter=%d\n",
216                         vq->vq_free_cnt,
217                         vq->vq_avail_idx,
218                         vq->vq_used_cons_idx,
219                         vq->vq_packed.cached_flags,
220                         vq->vq_packed.used_wrap_counter);
221
222         result = cvq->virtio_net_hdr_mz->addr;
223         return result;
224 }
225
226 static struct virtio_pmd_ctrl *
227 virtio_send_command_split(struct virtnet_ctl *cvq,
228                           struct virtio_pmd_ctrl *ctrl,
229                           int *dlen, int pkt_num)
230 {
231         struct virtio_pmd_ctrl *result;
232         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
233         uint32_t head, i;
234         int k, sum = 0;
235
236         head = vq->vq_desc_head_idx;
237
238         /*
239          * Format is enforced in qemu code:
240          * One TX packet for header;
241          * At least one TX packet per argument;
242          * One RX packet for ACK.
243          */
244         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
245         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
246         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
247         vq->vq_free_cnt--;
248         i = vq->vq_split.ring.desc[head].next;
249
250         for (k = 0; k < pkt_num; k++) {
251                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
252                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
253                         + sizeof(struct virtio_net_ctrl_hdr)
254                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
255                 vq->vq_split.ring.desc[i].len = dlen[k];
256                 sum += dlen[k];
257                 vq->vq_free_cnt--;
258                 i = vq->vq_split.ring.desc[i].next;
259         }
260
261         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
262         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
263                         + sizeof(struct virtio_net_ctrl_hdr);
264         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
265         vq->vq_free_cnt--;
266
267         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
268
269         vq_update_avail_ring(vq, head);
270         vq_update_avail_idx(vq);
271
272         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
273
274         virtqueue_notify(vq);
275
276         while (virtqueue_nused(vq) == 0)
277                 usleep(100);
278
279         while (virtqueue_nused(vq)) {
280                 uint32_t idx, desc_idx, used_idx;
281                 struct vring_used_elem *uep;
282
283                 used_idx = (uint32_t)(vq->vq_used_cons_idx
284                                 & (vq->vq_nentries - 1));
285                 uep = &vq->vq_split.ring.used->ring[used_idx];
286                 idx = (uint32_t) uep->id;
287                 desc_idx = idx;
288
289                 while (vq->vq_split.ring.desc[desc_idx].flags &
290                                 VRING_DESC_F_NEXT) {
291                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
292                         vq->vq_free_cnt++;
293                 }
294
295                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
296                 vq->vq_desc_head_idx = idx;
297
298                 vq->vq_used_cons_idx++;
299                 vq->vq_free_cnt++;
300         }
301
302         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
303                         vq->vq_free_cnt, vq->vq_desc_head_idx);
304
305         result = cvq->virtio_net_hdr_mz->addr;
306         return result;
307 }
308
309 static int
310 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
311                     int *dlen, int pkt_num)
312 {
313         virtio_net_ctrl_ack status = ~0;
314         struct virtio_pmd_ctrl *result;
315         struct virtqueue *vq;
316
317         ctrl->status = status;
318
319         if (!cvq) {
320                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
321                 return -1;
322         }
323
324         rte_spinlock_lock(&cvq->lock);
325         vq = virtnet_cq_to_vq(cvq);
326
327         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
328                 "vq->hw->cvq = %p vq = %p",
329                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
330
331         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
332                 rte_spinlock_unlock(&cvq->lock);
333                 return -1;
334         }
335
336         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
337                 sizeof(struct virtio_pmd_ctrl));
338
339         if (virtio_with_packed_queue(vq->hw))
340                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
341         else
342                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
343
344         rte_spinlock_unlock(&cvq->lock);
345         return result->status;
346 }
347
348 static int
349 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
350 {
351         struct virtio_hw *hw = dev->data->dev_private;
352         struct virtio_pmd_ctrl ctrl;
353         int dlen[1];
354         int ret;
355
356         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
357         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
358         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
359
360         dlen[0] = sizeof(uint16_t);
361
362         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
363         if (ret) {
364                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
365                           "failed, this is too late now...");
366                 return -EINVAL;
367         }
368
369         return 0;
370 }
371
372 static void
373 virtio_dev_queue_release(void *queue __rte_unused)
374 {
375         /* do nothing */
376 }
377
378 static uint16_t
379 virtio_get_nr_vq(struct virtio_hw *hw)
380 {
381         uint16_t nr_vq = hw->max_queue_pairs * 2;
382
383         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
384                 nr_vq += 1;
385
386         return nr_vq;
387 }
388
389 static void
390 virtio_init_vring(struct virtqueue *vq)
391 {
392         int size = vq->vq_nentries;
393         uint8_t *ring_mem = vq->vq_ring_virt_mem;
394
395         PMD_INIT_FUNC_TRACE();
396
397         memset(ring_mem, 0, vq->vq_ring_size);
398
399         vq->vq_used_cons_idx = 0;
400         vq->vq_desc_head_idx = 0;
401         vq->vq_avail_idx = 0;
402         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
403         vq->vq_free_cnt = vq->vq_nentries;
404         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
405         if (virtio_with_packed_queue(vq->hw)) {
406                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
407                                   VIRTIO_VRING_ALIGN, size);
408                 vring_desc_init_packed(vq, size);
409         } else {
410                 struct vring *vr = &vq->vq_split.ring;
411
412                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
413                 vring_desc_init_split(vr->desc, size);
414         }
415         /*
416          * Disable device(host) interrupting guest
417          */
418         virtqueue_disable_intr(vq);
419 }
420
421 static int
422 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
423 {
424         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
425         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
426         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
427         unsigned int vq_size, size;
428         struct virtio_hw *hw = dev->data->dev_private;
429         struct virtnet_rx *rxvq = NULL;
430         struct virtnet_tx *txvq = NULL;
431         struct virtnet_ctl *cvq = NULL;
432         struct virtqueue *vq;
433         size_t sz_hdr_mz = 0;
434         void *sw_ring = NULL;
435         int queue_type = virtio_get_queue_type(hw, queue_idx);
436         int ret;
437         int numa_node = dev->device->numa_node;
438
439         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
440                         queue_idx, numa_node);
441
442         /*
443          * Read the virtqueue size from the Queue Size field
444          * Always power of 2 and if 0 virtqueue does not exist
445          */
446         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
447         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
448         if (vq_size == 0) {
449                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
450                 return -EINVAL;
451         }
452
453         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
454                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
455                 return -EINVAL;
456         }
457
458         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
459                  dev->data->port_id, queue_idx);
460
461         size = RTE_ALIGN_CEIL(sizeof(*vq) +
462                                 vq_size * sizeof(struct vq_desc_extra),
463                                 RTE_CACHE_LINE_SIZE);
464         if (queue_type == VTNET_TQ) {
465                 /*
466                  * For each xmit packet, allocate a virtio_net_hdr
467                  * and indirect ring elements
468                  */
469                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
470         } else if (queue_type == VTNET_CQ) {
471                 /* Allocate a page for control vq command, data and status */
472                 sz_hdr_mz = rte_mem_page_size();
473         }
474
475         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
476                                 numa_node);
477         if (vq == NULL) {
478                 PMD_INIT_LOG(ERR, "can not allocate vq");
479                 return -ENOMEM;
480         }
481         hw->vqs[queue_idx] = vq;
482
483         vq->hw = hw;
484         vq->vq_queue_index = queue_idx;
485         vq->vq_nentries = vq_size;
486         if (virtio_with_packed_queue(hw)) {
487                 vq->vq_packed.used_wrap_counter = 1;
488                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
489                 vq->vq_packed.event_flags_shadow = 0;
490                 if (queue_type == VTNET_RQ)
491                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
492         }
493
494         /*
495          * Reserve a memzone for vring elements
496          */
497         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
498         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
499         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
500                      size, vq->vq_ring_size);
501
502         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
503                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
504                         VIRTIO_VRING_ALIGN);
505         if (mz == NULL) {
506                 if (rte_errno == EEXIST)
507                         mz = rte_memzone_lookup(vq_name);
508                 if (mz == NULL) {
509                         ret = -ENOMEM;
510                         goto free_vq;
511                 }
512         }
513
514         memset(mz->addr, 0, mz->len);
515
516         vq->vq_ring_mem = mz->iova;
517         vq->vq_ring_virt_mem = mz->addr;
518         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
519                      (uint64_t)mz->iova);
520         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
521                      (uint64_t)(uintptr_t)mz->addr);
522
523         virtio_init_vring(vq);
524
525         if (sz_hdr_mz) {
526                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
527                          dev->data->port_id, queue_idx);
528                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
529                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
530                                 RTE_CACHE_LINE_SIZE);
531                 if (hdr_mz == NULL) {
532                         if (rte_errno == EEXIST)
533                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
534                         if (hdr_mz == NULL) {
535                                 ret = -ENOMEM;
536                                 goto free_mz;
537                         }
538                 }
539         }
540
541         if (queue_type == VTNET_RQ) {
542                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
543                                sizeof(vq->sw_ring[0]);
544
545                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
546                                 RTE_CACHE_LINE_SIZE, numa_node);
547                 if (!sw_ring) {
548                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
549                         ret = -ENOMEM;
550                         goto free_hdr_mz;
551                 }
552
553                 vq->sw_ring = sw_ring;
554                 rxvq = &vq->rxq;
555                 rxvq->port_id = dev->data->port_id;
556                 rxvq->mz = mz;
557         } else if (queue_type == VTNET_TQ) {
558                 txvq = &vq->txq;
559                 txvq->port_id = dev->data->port_id;
560                 txvq->mz = mz;
561                 txvq->virtio_net_hdr_mz = hdr_mz;
562                 txvq->virtio_net_hdr_mem = hdr_mz->iova;
563         } else if (queue_type == VTNET_CQ) {
564                 cvq = &vq->cq;
565                 cvq->mz = mz;
566                 cvq->virtio_net_hdr_mz = hdr_mz;
567                 cvq->virtio_net_hdr_mem = hdr_mz->iova;
568                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
569
570                 hw->cvq = cvq;
571         }
572
573         if (queue_type == VTNET_TQ) {
574                 struct virtio_tx_region *txr;
575                 unsigned int i;
576
577                 txr = hdr_mz->addr;
578                 memset(txr, 0, vq_size * sizeof(*txr));
579                 for (i = 0; i < vq_size; i++) {
580                         /* first indirect descriptor is always the tx header */
581                         if (!virtio_with_packed_queue(hw)) {
582                                 struct vring_desc *start_dp = txr[i].tx_indir;
583                                 vring_desc_init_split(start_dp,
584                                                       RTE_DIM(txr[i].tx_indir));
585                                 start_dp->addr = txvq->virtio_net_hdr_mem
586                                         + i * sizeof(*txr)
587                                         + offsetof(struct virtio_tx_region,
588                                                    tx_hdr);
589                                 start_dp->len = hw->vtnet_hdr_size;
590                                 start_dp->flags = VRING_DESC_F_NEXT;
591                         } else {
592                                 struct vring_packed_desc *start_dp =
593                                         txr[i].tx_packed_indir;
594                                 vring_desc_init_indirect_packed(start_dp,
595                                       RTE_DIM(txr[i].tx_packed_indir));
596                                 start_dp->addr = txvq->virtio_net_hdr_mem
597                                         + i * sizeof(*txr)
598                                         + offsetof(struct virtio_tx_region,
599                                                    tx_hdr);
600                                 start_dp->len = hw->vtnet_hdr_size;
601                         }
602                 }
603         }
604
605         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
606                 PMD_INIT_LOG(ERR, "setup_queue failed");
607                 ret = -EINVAL;
608                 goto clean_vq;
609         }
610
611         return 0;
612
613 clean_vq:
614         hw->cvq = NULL;
615         rte_free(sw_ring);
616 free_hdr_mz:
617         rte_memzone_free(hdr_mz);
618 free_mz:
619         rte_memzone_free(mz);
620 free_vq:
621         rte_free(vq);
622
623         return ret;
624 }
625
626 static void
627 virtio_free_queues(struct virtio_hw *hw)
628 {
629         uint16_t nr_vq = virtio_get_nr_vq(hw);
630         struct virtqueue *vq;
631         int queue_type;
632         uint16_t i;
633
634         if (hw->vqs == NULL)
635                 return;
636
637         for (i = 0; i < nr_vq; i++) {
638                 vq = hw->vqs[i];
639                 if (!vq)
640                         continue;
641
642                 queue_type = virtio_get_queue_type(hw, i);
643                 if (queue_type == VTNET_RQ) {
644                         rte_free(vq->sw_ring);
645                         rte_memzone_free(vq->rxq.mz);
646                 } else if (queue_type == VTNET_TQ) {
647                         rte_memzone_free(vq->txq.mz);
648                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
649                 } else {
650                         rte_memzone_free(vq->cq.mz);
651                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
652                 }
653
654                 rte_free(vq);
655                 hw->vqs[i] = NULL;
656         }
657
658         rte_free(hw->vqs);
659         hw->vqs = NULL;
660 }
661
662 static int
663 virtio_alloc_queues(struct rte_eth_dev *dev)
664 {
665         struct virtio_hw *hw = dev->data->dev_private;
666         uint16_t nr_vq = virtio_get_nr_vq(hw);
667         uint16_t i;
668         int ret;
669
670         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
671         if (!hw->vqs) {
672                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
673                 return -ENOMEM;
674         }
675
676         for (i = 0; i < nr_vq; i++) {
677                 ret = virtio_init_queue(dev, i);
678                 if (ret < 0) {
679                         virtio_free_queues(hw);
680                         return ret;
681                 }
682         }
683
684         return 0;
685 }
686
687 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
688
689 int
690 virtio_dev_close(struct rte_eth_dev *dev)
691 {
692         struct virtio_hw *hw = dev->data->dev_private;
693         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
694
695         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
696         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
697                 return 0;
698
699         if (!hw->opened)
700                 return 0;
701         hw->opened = 0;
702
703         /* reset the NIC */
704         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
705                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
706         if (intr_conf->rxq)
707                 virtio_queues_unbind_intr(dev);
708
709         if (intr_conf->lsc || intr_conf->rxq) {
710                 virtio_intr_disable(dev);
711                 rte_intr_efd_disable(dev->intr_handle);
712                 rte_free(dev->intr_handle->intr_vec);
713                 dev->intr_handle->intr_vec = NULL;
714         }
715
716         virtio_reset(hw);
717         virtio_dev_free_mbufs(dev);
718         virtio_free_queues(hw);
719
720         return VIRTIO_OPS(hw)->dev_close(hw);
721 }
722
723 static int
724 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
725 {
726         struct virtio_hw *hw = dev->data->dev_private;
727         struct virtio_pmd_ctrl ctrl;
728         int dlen[1];
729         int ret;
730
731         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
732                 PMD_INIT_LOG(INFO, "host does not support rx control");
733                 return -ENOTSUP;
734         }
735
736         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
737         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
738         ctrl.data[0] = 1;
739         dlen[0] = 1;
740
741         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
742         if (ret) {
743                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
744                 return -EAGAIN;
745         }
746
747         return 0;
748 }
749
750 static int
751 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
752 {
753         struct virtio_hw *hw = dev->data->dev_private;
754         struct virtio_pmd_ctrl ctrl;
755         int dlen[1];
756         int ret;
757
758         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
759                 PMD_INIT_LOG(INFO, "host does not support rx control");
760                 return -ENOTSUP;
761         }
762
763         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
764         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
765         ctrl.data[0] = 0;
766         dlen[0] = 1;
767
768         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
769         if (ret) {
770                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
771                 return -EAGAIN;
772         }
773
774         return 0;
775 }
776
777 static int
778 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
779 {
780         struct virtio_hw *hw = dev->data->dev_private;
781         struct virtio_pmd_ctrl ctrl;
782         int dlen[1];
783         int ret;
784
785         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
786                 PMD_INIT_LOG(INFO, "host does not support rx control");
787                 return -ENOTSUP;
788         }
789
790         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
791         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
792         ctrl.data[0] = 1;
793         dlen[0] = 1;
794
795         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
796         if (ret) {
797                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
798                 return -EAGAIN;
799         }
800
801         return 0;
802 }
803
804 static int
805 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
806 {
807         struct virtio_hw *hw = dev->data->dev_private;
808         struct virtio_pmd_ctrl ctrl;
809         int dlen[1];
810         int ret;
811
812         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
813                 PMD_INIT_LOG(INFO, "host does not support rx control");
814                 return -ENOTSUP;
815         }
816
817         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
818         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
819         ctrl.data[0] = 0;
820         dlen[0] = 1;
821
822         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
823         if (ret) {
824                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
825                 return -EAGAIN;
826         }
827
828         return 0;
829 }
830
831 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
832 static int
833 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
834 {
835         struct virtio_hw *hw = dev->data->dev_private;
836         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
837                                  hw->vtnet_hdr_size;
838         uint32_t frame_size = mtu + ether_hdr_len;
839         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
840
841         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
842
843         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
844                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
845                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
846                 return -EINVAL;
847         }
848         return 0;
849 }
850
851 static int
852 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
853 {
854         struct virtio_hw *hw = dev->data->dev_private;
855         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
856         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
857
858         virtqueue_enable_intr(vq);
859         virtio_mb(hw->weak_barriers);
860         return 0;
861 }
862
863 static int
864 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
865 {
866         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
867         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
868
869         virtqueue_disable_intr(vq);
870         return 0;
871 }
872
873 /*
874  * dev_ops for virtio, bare necessities for basic operation
875  */
876 static const struct eth_dev_ops virtio_eth_dev_ops = {
877         .dev_configure           = virtio_dev_configure,
878         .dev_start               = virtio_dev_start,
879         .dev_stop                = virtio_dev_stop,
880         .dev_close               = virtio_dev_close,
881         .promiscuous_enable      = virtio_dev_promiscuous_enable,
882         .promiscuous_disable     = virtio_dev_promiscuous_disable,
883         .allmulticast_enable     = virtio_dev_allmulticast_enable,
884         .allmulticast_disable    = virtio_dev_allmulticast_disable,
885         .mtu_set                 = virtio_mtu_set,
886         .dev_infos_get           = virtio_dev_info_get,
887         .stats_get               = virtio_dev_stats_get,
888         .xstats_get              = virtio_dev_xstats_get,
889         .xstats_get_names        = virtio_dev_xstats_get_names,
890         .stats_reset             = virtio_dev_stats_reset,
891         .xstats_reset            = virtio_dev_stats_reset,
892         .link_update             = virtio_dev_link_update,
893         .vlan_offload_set        = virtio_dev_vlan_offload_set,
894         .rx_queue_setup          = virtio_dev_rx_queue_setup,
895         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
896         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
897         .rx_queue_release        = virtio_dev_queue_release,
898         .tx_queue_setup          = virtio_dev_tx_queue_setup,
899         .tx_queue_release        = virtio_dev_queue_release,
900         /* collect stats per queue */
901         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
902         .vlan_filter_set         = virtio_vlan_filter_set,
903         .mac_addr_add            = virtio_mac_addr_add,
904         .mac_addr_remove         = virtio_mac_addr_remove,
905         .mac_addr_set            = virtio_mac_addr_set,
906 };
907
908 /*
909  * dev_ops for virtio-user in secondary processes, as we just have
910  * some limited supports currently.
911  */
912 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
913         .dev_infos_get           = virtio_dev_info_get,
914         .stats_get               = virtio_dev_stats_get,
915         .xstats_get              = virtio_dev_xstats_get,
916         .xstats_get_names        = virtio_dev_xstats_get_names,
917         .stats_reset             = virtio_dev_stats_reset,
918         .xstats_reset            = virtio_dev_stats_reset,
919         /* collect stats per queue */
920         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
921 };
922
923 static void
924 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
925 {
926         unsigned i;
927
928         for (i = 0; i < dev->data->nb_tx_queues; i++) {
929                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
930                 if (txvq == NULL)
931                         continue;
932
933                 stats->opackets += txvq->stats.packets;
934                 stats->obytes += txvq->stats.bytes;
935
936                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
937                         stats->q_opackets[i] = txvq->stats.packets;
938                         stats->q_obytes[i] = txvq->stats.bytes;
939                 }
940         }
941
942         for (i = 0; i < dev->data->nb_rx_queues; i++) {
943                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
944                 if (rxvq == NULL)
945                         continue;
946
947                 stats->ipackets += rxvq->stats.packets;
948                 stats->ibytes += rxvq->stats.bytes;
949                 stats->ierrors += rxvq->stats.errors;
950
951                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
952                         stats->q_ipackets[i] = rxvq->stats.packets;
953                         stats->q_ibytes[i] = rxvq->stats.bytes;
954                 }
955         }
956
957         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
958 }
959
960 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
961                                        struct rte_eth_xstat_name *xstats_names,
962                                        __rte_unused unsigned limit)
963 {
964         unsigned i;
965         unsigned count = 0;
966         unsigned t;
967
968         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
969                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
970
971         if (xstats_names != NULL) {
972                 /* Note: limit checked in rte_eth_xstats_names() */
973
974                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
975                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
976                         if (rxvq == NULL)
977                                 continue;
978                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
979                                 snprintf(xstats_names[count].name,
980                                         sizeof(xstats_names[count].name),
981                                         "rx_q%u_%s", i,
982                                         rte_virtio_rxq_stat_strings[t].name);
983                                 count++;
984                         }
985                 }
986
987                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
988                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
989                         if (txvq == NULL)
990                                 continue;
991                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
992                                 snprintf(xstats_names[count].name,
993                                         sizeof(xstats_names[count].name),
994                                         "tx_q%u_%s", i,
995                                         rte_virtio_txq_stat_strings[t].name);
996                                 count++;
997                         }
998                 }
999                 return count;
1000         }
1001         return nstats;
1002 }
1003
1004 static int
1005 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1006                       unsigned n)
1007 {
1008         unsigned i;
1009         unsigned count = 0;
1010
1011         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1012                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1013
1014         if (n < nstats)
1015                 return nstats;
1016
1017         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1018                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1019
1020                 if (rxvq == NULL)
1021                         continue;
1022
1023                 unsigned t;
1024
1025                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1026                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1027                                 rte_virtio_rxq_stat_strings[t].offset);
1028                         xstats[count].id = count;
1029                         count++;
1030                 }
1031         }
1032
1033         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1034                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1035
1036                 if (txvq == NULL)
1037                         continue;
1038
1039                 unsigned t;
1040
1041                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1042                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1043                                 rte_virtio_txq_stat_strings[t].offset);
1044                         xstats[count].id = count;
1045                         count++;
1046                 }
1047         }
1048
1049         return count;
1050 }
1051
1052 static int
1053 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1054 {
1055         virtio_update_stats(dev, stats);
1056
1057         return 0;
1058 }
1059
1060 static int
1061 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1062 {
1063         unsigned int i;
1064
1065         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1066                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1067                 if (txvq == NULL)
1068                         continue;
1069
1070                 txvq->stats.packets = 0;
1071                 txvq->stats.bytes = 0;
1072                 txvq->stats.multicast = 0;
1073                 txvq->stats.broadcast = 0;
1074                 memset(txvq->stats.size_bins, 0,
1075                        sizeof(txvq->stats.size_bins[0]) * 8);
1076         }
1077
1078         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1079                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1080                 if (rxvq == NULL)
1081                         continue;
1082
1083                 rxvq->stats.packets = 0;
1084                 rxvq->stats.bytes = 0;
1085                 rxvq->stats.errors = 0;
1086                 rxvq->stats.multicast = 0;
1087                 rxvq->stats.broadcast = 0;
1088                 memset(rxvq->stats.size_bins, 0,
1089                        sizeof(rxvq->stats.size_bins[0]) * 8);
1090         }
1091
1092         return 0;
1093 }
1094
1095 static void
1096 virtio_set_hwaddr(struct virtio_hw *hw)
1097 {
1098         virtio_write_dev_config(hw,
1099                         offsetof(struct virtio_net_config, mac),
1100                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1101 }
1102
1103 static void
1104 virtio_get_hwaddr(struct virtio_hw *hw)
1105 {
1106         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1107                 virtio_read_dev_config(hw,
1108                         offsetof(struct virtio_net_config, mac),
1109                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1110         } else {
1111                 rte_eth_random_addr(&hw->mac_addr[0]);
1112                 virtio_set_hwaddr(hw);
1113         }
1114 }
1115
1116 static int
1117 virtio_mac_table_set(struct virtio_hw *hw,
1118                      const struct virtio_net_ctrl_mac *uc,
1119                      const struct virtio_net_ctrl_mac *mc)
1120 {
1121         struct virtio_pmd_ctrl ctrl;
1122         int err, len[2];
1123
1124         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1125                 PMD_DRV_LOG(INFO, "host does not support mac table");
1126                 return -1;
1127         }
1128
1129         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1130         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1131
1132         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1133         memcpy(ctrl.data, uc, len[0]);
1134
1135         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1136         memcpy(ctrl.data + len[0], mc, len[1]);
1137
1138         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1139         if (err != 0)
1140                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1141         return err;
1142 }
1143
1144 static int
1145 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1146                     uint32_t index, uint32_t vmdq __rte_unused)
1147 {
1148         struct virtio_hw *hw = dev->data->dev_private;
1149         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1150         unsigned int i;
1151         struct virtio_net_ctrl_mac *uc, *mc;
1152
1153         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1154                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1155                 return -EINVAL;
1156         }
1157
1158         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1159                 sizeof(uc->entries));
1160         uc->entries = 0;
1161         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1162                 sizeof(mc->entries));
1163         mc->entries = 0;
1164
1165         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1166                 const struct rte_ether_addr *addr
1167                         = (i == index) ? mac_addr : addrs + i;
1168                 struct virtio_net_ctrl_mac *tbl
1169                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1170
1171                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1172         }
1173
1174         return virtio_mac_table_set(hw, uc, mc);
1175 }
1176
1177 static void
1178 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1179 {
1180         struct virtio_hw *hw = dev->data->dev_private;
1181         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1182         struct virtio_net_ctrl_mac *uc, *mc;
1183         unsigned int i;
1184
1185         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1186                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1187                 return;
1188         }
1189
1190         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1191                 sizeof(uc->entries));
1192         uc->entries = 0;
1193         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1194                 sizeof(mc->entries));
1195         mc->entries = 0;
1196
1197         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1198                 struct virtio_net_ctrl_mac *tbl;
1199
1200                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1201                         continue;
1202
1203                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1204                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1205                         RTE_ETHER_ADDR_LEN);
1206         }
1207
1208         virtio_mac_table_set(hw, uc, mc);
1209 }
1210
1211 static int
1212 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1213 {
1214         struct virtio_hw *hw = dev->data->dev_private;
1215
1216         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1217
1218         /* Use atomic update if available */
1219         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1220                 struct virtio_pmd_ctrl ctrl;
1221                 int len = RTE_ETHER_ADDR_LEN;
1222
1223                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1224                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1225
1226                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1227                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1228         }
1229
1230         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1231                 return -ENOTSUP;
1232
1233         virtio_set_hwaddr(hw);
1234         return 0;
1235 }
1236
1237 static int
1238 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1239 {
1240         struct virtio_hw *hw = dev->data->dev_private;
1241         struct virtio_pmd_ctrl ctrl;
1242         int len;
1243
1244         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1245                 return -ENOTSUP;
1246
1247         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1248         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1249         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1250         len = sizeof(vlan_id);
1251
1252         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1253 }
1254
1255 static int
1256 virtio_intr_unmask(struct rte_eth_dev *dev)
1257 {
1258         struct virtio_hw *hw = dev->data->dev_private;
1259
1260         if (rte_intr_ack(dev->intr_handle) < 0)
1261                 return -1;
1262
1263         if (VIRTIO_OPS(hw)->intr_detect)
1264                 VIRTIO_OPS(hw)->intr_detect(hw);
1265
1266         return 0;
1267 }
1268
1269 static int
1270 virtio_intr_enable(struct rte_eth_dev *dev)
1271 {
1272         struct virtio_hw *hw = dev->data->dev_private;
1273
1274         if (rte_intr_enable(dev->intr_handle) < 0)
1275                 return -1;
1276
1277         if (VIRTIO_OPS(hw)->intr_detect)
1278                 VIRTIO_OPS(hw)->intr_detect(hw);
1279
1280         return 0;
1281 }
1282
1283 static int
1284 virtio_intr_disable(struct rte_eth_dev *dev)
1285 {
1286         struct virtio_hw *hw = dev->data->dev_private;
1287
1288         if (rte_intr_disable(dev->intr_handle) < 0)
1289                 return -1;
1290
1291         if (VIRTIO_OPS(hw)->intr_detect)
1292                 VIRTIO_OPS(hw)->intr_detect(hw);
1293
1294         return 0;
1295 }
1296
1297 static int
1298 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1299 {
1300         uint64_t host_features;
1301
1302         /* Prepare guest_features: feature that driver wants to support */
1303         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1304                 req_features);
1305
1306         /* Read device(host) feature bits */
1307         host_features = VIRTIO_OPS(hw)->get_features(hw);
1308         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1309                 host_features);
1310
1311         /* If supported, ensure MTU value is valid before acknowledging it. */
1312         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1313                 struct virtio_net_config config;
1314
1315                 virtio_read_dev_config(hw,
1316                         offsetof(struct virtio_net_config, mtu),
1317                         &config.mtu, sizeof(config.mtu));
1318
1319                 if (config.mtu < RTE_ETHER_MIN_MTU)
1320                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1321         }
1322
1323         /*
1324          * Negotiate features: Subset of device feature bits are written back
1325          * guest feature bits.
1326          */
1327         hw->guest_features = req_features;
1328         hw->guest_features = virtio_negotiate_features(hw, host_features);
1329         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1330                 hw->guest_features);
1331
1332         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1333                 return -1;
1334
1335         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1336                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1337
1338                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1339                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1340                         return -1;
1341                 }
1342         }
1343
1344         hw->req_guest_features = req_features;
1345
1346         return 0;
1347 }
1348
1349 int
1350 virtio_dev_pause(struct rte_eth_dev *dev)
1351 {
1352         struct virtio_hw *hw = dev->data->dev_private;
1353
1354         rte_spinlock_lock(&hw->state_lock);
1355
1356         if (hw->started == 0) {
1357                 /* Device is just stopped. */
1358                 rte_spinlock_unlock(&hw->state_lock);
1359                 return -1;
1360         }
1361         hw->started = 0;
1362         /*
1363          * Prevent the worker threads from touching queues to avoid contention,
1364          * 1 ms should be enough for the ongoing Tx function to finish.
1365          */
1366         rte_delay_ms(1);
1367         return 0;
1368 }
1369
1370 /*
1371  * Recover hw state to let the worker threads continue.
1372  */
1373 void
1374 virtio_dev_resume(struct rte_eth_dev *dev)
1375 {
1376         struct virtio_hw *hw = dev->data->dev_private;
1377
1378         hw->started = 1;
1379         rte_spinlock_unlock(&hw->state_lock);
1380 }
1381
1382 /*
1383  * Should be called only after device is paused.
1384  */
1385 int
1386 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1387                 int nb_pkts)
1388 {
1389         struct virtio_hw *hw = dev->data->dev_private;
1390         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1391         int ret;
1392
1393         hw->inject_pkts = tx_pkts;
1394         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1395         hw->inject_pkts = NULL;
1396
1397         return ret;
1398 }
1399
1400 static void
1401 virtio_notify_peers(struct rte_eth_dev *dev)
1402 {
1403         struct virtio_hw *hw = dev->data->dev_private;
1404         struct virtnet_rx *rxvq;
1405         struct rte_mbuf *rarp_mbuf;
1406
1407         if (!dev->data->rx_queues)
1408                 return;
1409
1410         rxvq = dev->data->rx_queues[0];
1411         if (!rxvq)
1412                 return;
1413
1414         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1415                         (struct rte_ether_addr *)hw->mac_addr);
1416         if (rarp_mbuf == NULL) {
1417                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1418                 return;
1419         }
1420
1421         /* If virtio port just stopped, no need to send RARP */
1422         if (virtio_dev_pause(dev) < 0) {
1423                 rte_pktmbuf_free(rarp_mbuf);
1424                 return;
1425         }
1426
1427         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1428         virtio_dev_resume(dev);
1429 }
1430
1431 static void
1432 virtio_ack_link_announce(struct rte_eth_dev *dev)
1433 {
1434         struct virtio_hw *hw = dev->data->dev_private;
1435         struct virtio_pmd_ctrl ctrl;
1436
1437         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1438         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1439
1440         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1441 }
1442
1443 /*
1444  * Process virtio config changed interrupt. Call the callback
1445  * if link state changed, generate gratuitous RARP packet if
1446  * the status indicates an ANNOUNCE.
1447  */
1448 void
1449 virtio_interrupt_handler(void *param)
1450 {
1451         struct rte_eth_dev *dev = param;
1452         struct virtio_hw *hw = dev->data->dev_private;
1453         uint8_t isr;
1454         uint16_t status;
1455
1456         /* Read interrupt status which clears interrupt */
1457         isr = virtio_get_isr(hw);
1458         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1459
1460         if (virtio_intr_unmask(dev) < 0)
1461                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1462
1463         if (isr & VIRTIO_ISR_CONFIG) {
1464                 if (virtio_dev_link_update(dev, 0) == 0)
1465                         rte_eth_dev_callback_process(dev,
1466                                                      RTE_ETH_EVENT_INTR_LSC,
1467                                                      NULL);
1468
1469                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1470                         virtio_read_dev_config(hw,
1471                                 offsetof(struct virtio_net_config, status),
1472                                 &status, sizeof(status));
1473                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1474                                 virtio_notify_peers(dev);
1475                                 if (hw->cvq)
1476                                         virtio_ack_link_announce(dev);
1477                         }
1478                 }
1479         }
1480 }
1481
1482 /* set rx and tx handlers according to what is supported */
1483 static void
1484 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1485 {
1486         struct virtio_hw *hw = eth_dev->data->dev_private;
1487
1488         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1489         if (virtio_with_packed_queue(hw)) {
1490                 PMD_INIT_LOG(INFO,
1491                         "virtio: using packed ring %s Tx path on port %u",
1492                         hw->use_vec_tx ? "vectorized" : "standard",
1493                         eth_dev->data->port_id);
1494                 if (hw->use_vec_tx)
1495                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1496                 else
1497                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1498         } else {
1499                 if (hw->use_inorder_tx) {
1500                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1501                                 eth_dev->data->port_id);
1502                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1503                 } else {
1504                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1505                                 eth_dev->data->port_id);
1506                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1507                 }
1508         }
1509
1510         if (virtio_with_packed_queue(hw)) {
1511                 if (hw->use_vec_rx) {
1512                         PMD_INIT_LOG(INFO,
1513                                 "virtio: using packed ring vectorized Rx path on port %u",
1514                                 eth_dev->data->port_id);
1515                         eth_dev->rx_pkt_burst =
1516                                 &virtio_recv_pkts_packed_vec;
1517                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1518                         PMD_INIT_LOG(INFO,
1519                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1520                                 eth_dev->data->port_id);
1521                         eth_dev->rx_pkt_burst =
1522                                 &virtio_recv_mergeable_pkts_packed;
1523                 } else {
1524                         PMD_INIT_LOG(INFO,
1525                                 "virtio: using packed ring standard Rx path on port %u",
1526                                 eth_dev->data->port_id);
1527                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1528                 }
1529         } else {
1530                 if (hw->use_vec_rx) {
1531                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1532                                 eth_dev->data->port_id);
1533                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1534                 } else if (hw->use_inorder_rx) {
1535                         PMD_INIT_LOG(INFO,
1536                                 "virtio: using inorder Rx path on port %u",
1537                                 eth_dev->data->port_id);
1538                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1539                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1540                         PMD_INIT_LOG(INFO,
1541                                 "virtio: using mergeable buffer Rx path on port %u",
1542                                 eth_dev->data->port_id);
1543                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1544                 } else {
1545                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1546                                 eth_dev->data->port_id);
1547                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1548                 }
1549         }
1550
1551 }
1552
1553 /* Only support 1:1 queue/interrupt mapping so far.
1554  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1555  * interrupt vectors (<N+1).
1556  */
1557 static int
1558 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1559 {
1560         uint32_t i;
1561         struct virtio_hw *hw = dev->data->dev_private;
1562
1563         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1564         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1565                 dev->intr_handle->intr_vec[i] = i + 1;
1566                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1567                                                  VIRTIO_MSI_NO_VECTOR) {
1568                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1569                         return -EBUSY;
1570                 }
1571         }
1572
1573         return 0;
1574 }
1575
1576 static void
1577 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1578 {
1579         uint32_t i;
1580         struct virtio_hw *hw = dev->data->dev_private;
1581
1582         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1583         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1584                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1585                                              hw->vqs[i * VTNET_CQ],
1586                                              VIRTIO_MSI_NO_VECTOR);
1587 }
1588
1589 static int
1590 virtio_configure_intr(struct rte_eth_dev *dev)
1591 {
1592         struct virtio_hw *hw = dev->data->dev_private;
1593
1594         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1595                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1596                 return -ENOTSUP;
1597         }
1598
1599         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1600                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1601                 return -1;
1602         }
1603
1604         if (!dev->intr_handle->intr_vec) {
1605                 dev->intr_handle->intr_vec =
1606                         rte_zmalloc("intr_vec",
1607                                     hw->max_queue_pairs * sizeof(int), 0);
1608                 if (!dev->intr_handle->intr_vec) {
1609                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1610                                      hw->max_queue_pairs);
1611                         return -ENOMEM;
1612                 }
1613         }
1614
1615         /* Re-register callback to update max_intr */
1616         rte_intr_callback_unregister(dev->intr_handle,
1617                                      virtio_interrupt_handler,
1618                                      dev);
1619         rte_intr_callback_register(dev->intr_handle,
1620                                    virtio_interrupt_handler,
1621                                    dev);
1622
1623         /* DO NOT try to remove this! This function will enable msix, or QEMU
1624          * will encounter SIGSEGV when DRIVER_OK is sent.
1625          * And for legacy devices, this should be done before queue/vec binding
1626          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1627          * (22) will be ignored.
1628          */
1629         if (virtio_intr_enable(dev) < 0) {
1630                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1631                 return -1;
1632         }
1633
1634         if (virtio_queues_bind_intr(dev) < 0) {
1635                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1636                 return -1;
1637         }
1638
1639         return 0;
1640 }
1641 #define DUPLEX_UNKNOWN   0xff
1642 /* reset device and renegotiate features if needed */
1643 static int
1644 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1645 {
1646         struct virtio_hw *hw = eth_dev->data->dev_private;
1647         struct virtio_net_config *config;
1648         struct virtio_net_config local_config;
1649         int ret;
1650
1651         /* Reset the device although not necessary at startup */
1652         virtio_reset(hw);
1653
1654         if (hw->vqs) {
1655                 virtio_dev_free_mbufs(eth_dev);
1656                 virtio_free_queues(hw);
1657         }
1658
1659         /* Tell the host we've noticed this device. */
1660         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1661
1662         /* Tell the host we've known how to drive the device. */
1663         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1664         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
1665                 return -1;
1666
1667         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1668
1669         /* If host does not support both status and MSI-X then disable LSC */
1670         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
1671                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1672         else
1673                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1674
1675         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1676
1677         /* Setting up rx_header size for the device */
1678         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1679             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
1680             virtio_with_packed_queue(hw))
1681                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1682         else
1683                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1684
1685         /* Copy the permanent MAC address to: virtio_hw */
1686         virtio_get_hwaddr(hw);
1687         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1688                         &eth_dev->data->mac_addrs[0]);
1689         PMD_INIT_LOG(DEBUG,
1690                      "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1691                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1692                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1693
1694         if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1695                 if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1696                         config = &local_config;
1697                         virtio_read_dev_config(hw,
1698                                 offsetof(struct virtio_net_config, speed),
1699                                 &config->speed, sizeof(config->speed));
1700                         virtio_read_dev_config(hw,
1701                                 offsetof(struct virtio_net_config, duplex),
1702                                 &config->duplex, sizeof(config->duplex));
1703                         hw->speed = config->speed;
1704                         hw->duplex = config->duplex;
1705                 }
1706         }
1707         if (hw->duplex == DUPLEX_UNKNOWN)
1708                 hw->duplex = ETH_LINK_FULL_DUPLEX;
1709         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1710                 hw->speed, hw->duplex);
1711         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1712                 config = &local_config;
1713
1714                 virtio_read_dev_config(hw,
1715                         offsetof(struct virtio_net_config, mac),
1716                         &config->mac, sizeof(config->mac));
1717
1718                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1719                         virtio_read_dev_config(hw,
1720                                 offsetof(struct virtio_net_config, status),
1721                                 &config->status, sizeof(config->status));
1722                 } else {
1723                         PMD_INIT_LOG(DEBUG,
1724                                      "VIRTIO_NET_F_STATUS is not supported");
1725                         config->status = 0;
1726                 }
1727
1728                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ)) {
1729                         virtio_read_dev_config(hw,
1730                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1731                                 &config->max_virtqueue_pairs,
1732                                 sizeof(config->max_virtqueue_pairs));
1733                 } else {
1734                         PMD_INIT_LOG(DEBUG,
1735                                      "VIRTIO_NET_F_MQ is not supported");
1736                         config->max_virtqueue_pairs = 1;
1737                 }
1738
1739                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1740
1741                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
1742                         virtio_read_dev_config(hw,
1743                                 offsetof(struct virtio_net_config, mtu),
1744                                 &config->mtu,
1745                                 sizeof(config->mtu));
1746
1747                         /*
1748                          * MTU value has already been checked at negotiation
1749                          * time, but check again in case it has changed since
1750                          * then, which should not happen.
1751                          */
1752                         if (config->mtu < RTE_ETHER_MIN_MTU) {
1753                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1754                                                 config->mtu);
1755                                 return -1;
1756                         }
1757
1758                         hw->max_mtu = config->mtu;
1759                         /* Set initial MTU to maximum one supported by vhost */
1760                         eth_dev->data->mtu = config->mtu;
1761
1762                 } else {
1763                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1764                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1765                 }
1766
1767                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1768                                 config->max_virtqueue_pairs);
1769                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1770                 PMD_INIT_LOG(DEBUG,
1771                                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1772                                 config->mac[0], config->mac[1],
1773                                 config->mac[2], config->mac[3],
1774                                 config->mac[4], config->mac[5]);
1775         } else {
1776                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1777                 hw->max_queue_pairs = 1;
1778                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1779                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1780         }
1781
1782         ret = virtio_alloc_queues(eth_dev);
1783         if (ret < 0)
1784                 return ret;
1785
1786         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1787                 if (virtio_configure_intr(eth_dev) < 0) {
1788                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1789                         virtio_free_queues(hw);
1790                         return -1;
1791                 }
1792         }
1793
1794         virtio_reinit_complete(hw);
1795
1796         return 0;
1797 }
1798
1799 /*
1800  * This function is based on probe() function in virtio_pci.c
1801  * It returns 0 on success.
1802  */
1803 int
1804 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1805 {
1806         struct virtio_hw *hw = eth_dev->data->dev_private;
1807         uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1808         int vectorized = 0;
1809         int ret;
1810
1811         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1812                 PMD_INIT_LOG(ERR,
1813                         "Not sufficient headroom required = %d, avail = %d",
1814                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1815                         RTE_PKTMBUF_HEADROOM);
1816
1817                 return -1;
1818         }
1819
1820         eth_dev->dev_ops = &virtio_eth_dev_ops;
1821         eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1822
1823         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1824                 set_rxtx_funcs(eth_dev);
1825                 return 0;
1826         }
1827
1828         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1829         if (ret < 0)
1830                 return ret;
1831         hw->speed = speed;
1832
1833         /* Allocate memory for storing MAC addresses */
1834         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1835                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1836         if (eth_dev->data->mac_addrs == NULL) {
1837                 PMD_INIT_LOG(ERR,
1838                         "Failed to allocate %d bytes needed to store MAC addresses",
1839                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1840                 return -ENOMEM;
1841         }
1842
1843         rte_spinlock_init(&hw->state_lock);
1844
1845         /* reset device and negotiate default features */
1846         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1847         if (ret < 0)
1848                 goto err_virtio_init;
1849
1850         if (vectorized) {
1851                 if (!virtio_with_packed_queue(hw)) {
1852                         hw->use_vec_rx = 1;
1853                 } else {
1854 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1855                         hw->use_vec_rx = 1;
1856                         hw->use_vec_tx = 1;
1857 #else
1858                         PMD_DRV_LOG(INFO,
1859                                 "building environment do not support packed ring vectorized");
1860 #endif
1861                 }
1862         }
1863
1864         hw->opened = 1;
1865
1866         return 0;
1867
1868 err_virtio_init:
1869         rte_free(eth_dev->data->mac_addrs);
1870         eth_dev->data->mac_addrs = NULL;
1871         return ret;
1872 }
1873
1874 static uint32_t
1875 virtio_dev_speed_capa_get(uint32_t speed)
1876 {
1877         switch (speed) {
1878         case ETH_SPEED_NUM_10G:
1879                 return ETH_LINK_SPEED_10G;
1880         case ETH_SPEED_NUM_20G:
1881                 return ETH_LINK_SPEED_20G;
1882         case ETH_SPEED_NUM_25G:
1883                 return ETH_LINK_SPEED_25G;
1884         case ETH_SPEED_NUM_40G:
1885                 return ETH_LINK_SPEED_40G;
1886         case ETH_SPEED_NUM_50G:
1887                 return ETH_LINK_SPEED_50G;
1888         case ETH_SPEED_NUM_56G:
1889                 return ETH_LINK_SPEED_56G;
1890         case ETH_SPEED_NUM_100G:
1891                 return ETH_LINK_SPEED_100G;
1892         case ETH_SPEED_NUM_200G:
1893                 return ETH_LINK_SPEED_200G;
1894         default:
1895                 return 0;
1896         }
1897 }
1898
1899 static int vectorized_check_handler(__rte_unused const char *key,
1900                 const char *value, void *ret_val)
1901 {
1902         if (strcmp(value, "1") == 0)
1903                 *(int *)ret_val = 1;
1904         else
1905                 *(int *)ret_val = 0;
1906
1907         return 0;
1908 }
1909
1910 #define VIRTIO_ARG_SPEED      "speed"
1911 #define VIRTIO_ARG_VECTORIZED "vectorized"
1912
1913 static int
1914 link_speed_handler(const char *key __rte_unused,
1915                 const char *value, void *ret_val)
1916 {
1917         uint32_t val;
1918         if (!value || !ret_val)
1919                 return -EINVAL;
1920         val = strtoul(value, NULL, 0);
1921         /* validate input */
1922         if (virtio_dev_speed_capa_get(val) == 0)
1923                 return -EINVAL;
1924         *(uint32_t *)ret_val = val;
1925
1926         return 0;
1927 }
1928
1929
1930 static int
1931 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
1932 {
1933         struct rte_kvargs *kvlist;
1934         int ret = 0;
1935
1936         if (devargs == NULL)
1937                 return 0;
1938
1939         kvlist = rte_kvargs_parse(devargs->args, NULL);
1940         if (kvlist == NULL) {
1941                 PMD_INIT_LOG(ERR, "error when parsing param");
1942                 return 0;
1943         }
1944
1945         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
1946                 ret = rte_kvargs_process(kvlist,
1947                                         VIRTIO_ARG_SPEED,
1948                                         link_speed_handler, speed);
1949                 if (ret < 0) {
1950                         PMD_INIT_LOG(ERR, "Failed to parse %s",
1951                                         VIRTIO_ARG_SPEED);
1952                         goto exit;
1953                 }
1954         }
1955
1956         if (vectorized &&
1957                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
1958                 ret = rte_kvargs_process(kvlist,
1959                                 VIRTIO_ARG_VECTORIZED,
1960                                 vectorized_check_handler, vectorized);
1961                 if (ret < 0) {
1962                         PMD_INIT_LOG(ERR, "Failed to parse %s",
1963                                         VIRTIO_ARG_VECTORIZED);
1964                         goto exit;
1965                 }
1966         }
1967
1968 exit:
1969         rte_kvargs_free(kvlist);
1970         return ret;
1971 }
1972
1973 static uint8_t
1974 rx_offload_enabled(struct virtio_hw *hw)
1975 {
1976         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
1977                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
1978                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
1979 }
1980
1981 static uint8_t
1982 tx_offload_enabled(struct virtio_hw *hw)
1983 {
1984         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
1985                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
1986                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
1987 }
1988
1989 /*
1990  * Configure virtio device
1991  * It returns 0 on success.
1992  */
1993 static int
1994 virtio_dev_configure(struct rte_eth_dev *dev)
1995 {
1996         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1997         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
1998         struct virtio_hw *hw = dev->data->dev_private;
1999         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2000                 hw->vtnet_hdr_size;
2001         uint64_t rx_offloads = rxmode->offloads;
2002         uint64_t tx_offloads = txmode->offloads;
2003         uint64_t req_features;
2004         int ret;
2005
2006         PMD_INIT_LOG(DEBUG, "configure");
2007         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2008
2009         if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2010                 PMD_DRV_LOG(ERR,
2011                         "Unsupported Rx multi queue mode %d",
2012                         rxmode->mq_mode);
2013                 return -EINVAL;
2014         }
2015
2016         if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2017                 PMD_DRV_LOG(ERR,
2018                         "Unsupported Tx multi queue mode %d",
2019                         txmode->mq_mode);
2020                 return -EINVAL;
2021         }
2022
2023         if (dev->data->dev_conf.intr_conf.rxq) {
2024                 ret = virtio_init_device(dev, hw->req_guest_features);
2025                 if (ret < 0)
2026                         return ret;
2027         }
2028
2029         if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len)
2030                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2031
2032         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2033                            DEV_RX_OFFLOAD_TCP_CKSUM))
2034                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2035
2036         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2037                 req_features |=
2038                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2039                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2040
2041         if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2042                            DEV_TX_OFFLOAD_TCP_CKSUM))
2043                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2044
2045         if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2046                 req_features |=
2047                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2048                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2049
2050         /* if request features changed, reinit the device */
2051         if (req_features != hw->req_guest_features) {
2052                 ret = virtio_init_device(dev, req_features);
2053                 if (ret < 0)
2054                         return ret;
2055         }
2056
2057         if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2058                             DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2059                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2060                 PMD_DRV_LOG(ERR,
2061                         "rx checksum not available on this host");
2062                 return -ENOTSUP;
2063         }
2064
2065         if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2066                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2067                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2068                 PMD_DRV_LOG(ERR,
2069                         "Large Receive Offload not available on this host");
2070                 return -ENOTSUP;
2071         }
2072
2073         /* start control queue */
2074         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2075                 virtio_dev_cq_start(dev);
2076
2077         if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2078                 hw->vlan_strip = 1;
2079
2080         if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2081                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2082                 PMD_DRV_LOG(ERR,
2083                             "vlan filtering not available on this host");
2084                 return -ENOTSUP;
2085         }
2086
2087         hw->has_tx_offload = tx_offload_enabled(hw);
2088         hw->has_rx_offload = rx_offload_enabled(hw);
2089
2090         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2091                 /* Enable vector (0) for Link State Intrerrupt */
2092                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2093                                 VIRTIO_MSI_NO_VECTOR) {
2094                         PMD_DRV_LOG(ERR, "failed to set config vector");
2095                         return -EBUSY;
2096                 }
2097
2098         if (virtio_with_packed_queue(hw)) {
2099 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2100                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2101                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2102                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2103                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2104                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2105                         PMD_DRV_LOG(INFO,
2106                                 "disabled packed ring vectorized path for requirements not met");
2107                         hw->use_vec_rx = 0;
2108                         hw->use_vec_tx = 0;
2109                 }
2110 #elif defined(RTE_ARCH_ARM)
2111                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2112                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2113                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2114                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2115                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2116                         PMD_DRV_LOG(INFO,
2117                                 "disabled packed ring vectorized path for requirements not met");
2118                         hw->use_vec_rx = 0;
2119                         hw->use_vec_tx = 0;
2120                 }
2121 #else
2122                 hw->use_vec_rx = 0;
2123                 hw->use_vec_tx = 0;
2124 #endif
2125
2126                 if (hw->use_vec_rx) {
2127                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2128                                 PMD_DRV_LOG(INFO,
2129                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2130                                 hw->use_vec_rx = 0;
2131                         }
2132
2133                         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2134                                 PMD_DRV_LOG(INFO,
2135                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2136                                 hw->use_vec_rx = 0;
2137                         }
2138                 }
2139         } else {
2140                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2141                         hw->use_inorder_tx = 1;
2142                         hw->use_inorder_rx = 1;
2143                         hw->use_vec_rx = 0;
2144                 }
2145
2146                 if (hw->use_vec_rx) {
2147 #if defined RTE_ARCH_ARM
2148                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2149                                 PMD_DRV_LOG(INFO,
2150                                         "disabled split ring vectorized path for requirement not met");
2151                                 hw->use_vec_rx = 0;
2152                         }
2153 #endif
2154                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2155                                 PMD_DRV_LOG(INFO,
2156                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2157                                 hw->use_vec_rx = 0;
2158                         }
2159
2160                         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2161                                            DEV_RX_OFFLOAD_TCP_CKSUM |
2162                                            DEV_RX_OFFLOAD_TCP_LRO |
2163                                            DEV_RX_OFFLOAD_VLAN_STRIP)) {
2164                                 PMD_DRV_LOG(INFO,
2165                                         "disabled split ring vectorized rx for offloading enabled");
2166                                 hw->use_vec_rx = 0;
2167                         }
2168
2169                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2170                                 PMD_DRV_LOG(INFO,
2171                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2172                                 hw->use_vec_rx = 0;
2173                         }
2174                 }
2175         }
2176
2177         return 0;
2178 }
2179
2180
2181 static int
2182 virtio_dev_start(struct rte_eth_dev *dev)
2183 {
2184         uint16_t nb_queues, i;
2185         struct virtqueue *vq;
2186         struct virtio_hw *hw = dev->data->dev_private;
2187         int ret;
2188
2189         /* Finish the initialization of the queues */
2190         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2191                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2192                 if (ret < 0)
2193                         return ret;
2194         }
2195         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2196                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2197                 if (ret < 0)
2198                         return ret;
2199         }
2200
2201         /* check if lsc interrupt feature is enabled */
2202         if (dev->data->dev_conf.intr_conf.lsc) {
2203                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2204                         PMD_DRV_LOG(ERR, "link status not supported by host");
2205                         return -ENOTSUP;
2206                 }
2207         }
2208
2209         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2210          * in device configure, but it could be unmapped  when device is
2211          * stopped.
2212          */
2213         if (dev->data->dev_conf.intr_conf.lsc ||
2214             dev->data->dev_conf.intr_conf.rxq) {
2215                 virtio_intr_disable(dev);
2216
2217                 /* Setup interrupt callback  */
2218                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2219                         rte_intr_callback_register(dev->intr_handle,
2220                                                    virtio_interrupt_handler,
2221                                                    dev);
2222
2223                 if (virtio_intr_enable(dev) < 0) {
2224                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2225                         return -EIO;
2226                 }
2227         }
2228
2229         /*Notify the backend
2230          *Otherwise the tap backend might already stop its queue due to fullness.
2231          *vhost backend will have no chance to be waked up
2232          */
2233         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2234         if (hw->max_queue_pairs > 1) {
2235                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2236                         return -EINVAL;
2237         }
2238
2239         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2240
2241         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2242                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2243                 /* Flush the old packets */
2244                 virtqueue_rxvq_flush(vq);
2245                 virtqueue_notify(vq);
2246         }
2247
2248         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2249                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2250                 virtqueue_notify(vq);
2251         }
2252
2253         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2254
2255         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2256                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2257                 VIRTQUEUE_DUMP(vq);
2258         }
2259
2260         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2261                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2262                 VIRTQUEUE_DUMP(vq);
2263         }
2264
2265         set_rxtx_funcs(dev);
2266         hw->started = 1;
2267
2268         /* Initialize Link state */
2269         virtio_dev_link_update(dev, 0);
2270
2271         return 0;
2272 }
2273
2274 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2275 {
2276         struct virtio_hw *hw = dev->data->dev_private;
2277         uint16_t nr_vq = virtio_get_nr_vq(hw);
2278         const char *type __rte_unused;
2279         unsigned int i, mbuf_num = 0;
2280         struct virtqueue *vq;
2281         struct rte_mbuf *buf;
2282         int queue_type;
2283
2284         if (hw->vqs == NULL)
2285                 return;
2286
2287         for (i = 0; i < nr_vq; i++) {
2288                 vq = hw->vqs[i];
2289                 if (!vq)
2290                         continue;
2291
2292                 queue_type = virtio_get_queue_type(hw, i);
2293                 if (queue_type == VTNET_RQ)
2294                         type = "rxq";
2295                 else if (queue_type == VTNET_TQ)
2296                         type = "txq";
2297                 else
2298                         continue;
2299
2300                 PMD_INIT_LOG(DEBUG,
2301                         "Before freeing %s[%d] used and unused buf",
2302                         type, i);
2303                 VIRTQUEUE_DUMP(vq);
2304
2305                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2306                         rte_pktmbuf_free(buf);
2307                         mbuf_num++;
2308                 }
2309
2310                 PMD_INIT_LOG(DEBUG,
2311                         "After freeing %s[%d] used and unused buf",
2312                         type, i);
2313                 VIRTQUEUE_DUMP(vq);
2314         }
2315
2316         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2317 }
2318
2319 /*
2320  * Stop device: disable interrupt and mark link down
2321  */
2322 int
2323 virtio_dev_stop(struct rte_eth_dev *dev)
2324 {
2325         struct virtio_hw *hw = dev->data->dev_private;
2326         struct rte_eth_link link;
2327         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2328
2329         PMD_INIT_LOG(DEBUG, "stop");
2330         dev->data->dev_started = 0;
2331
2332         rte_spinlock_lock(&hw->state_lock);
2333         if (!hw->started)
2334                 goto out_unlock;
2335         hw->started = 0;
2336
2337         if (intr_conf->lsc || intr_conf->rxq) {
2338                 virtio_intr_disable(dev);
2339
2340                 /* Reset interrupt callback  */
2341                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2342                         rte_intr_callback_unregister(dev->intr_handle,
2343                                                      virtio_interrupt_handler,
2344                                                      dev);
2345                 }
2346         }
2347
2348         memset(&link, 0, sizeof(link));
2349         rte_eth_linkstatus_set(dev, &link);
2350 out_unlock:
2351         rte_spinlock_unlock(&hw->state_lock);
2352
2353         return 0;
2354 }
2355
2356 static int
2357 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2358 {
2359         struct rte_eth_link link;
2360         uint16_t status;
2361         struct virtio_hw *hw = dev->data->dev_private;
2362
2363         memset(&link, 0, sizeof(link));
2364         link.link_duplex = hw->duplex;
2365         link.link_speed  = hw->speed;
2366         link.link_autoneg = ETH_LINK_AUTONEG;
2367
2368         if (!hw->started) {
2369                 link.link_status = ETH_LINK_DOWN;
2370                 link.link_speed = ETH_SPEED_NUM_NONE;
2371         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2372                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2373                 virtio_read_dev_config(hw,
2374                                 offsetof(struct virtio_net_config, status),
2375                                 &status, sizeof(status));
2376                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2377                         link.link_status = ETH_LINK_DOWN;
2378                         link.link_speed = ETH_SPEED_NUM_NONE;
2379                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2380                                      dev->data->port_id);
2381                 } else {
2382                         link.link_status = ETH_LINK_UP;
2383                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2384                                      dev->data->port_id);
2385                 }
2386         } else {
2387                 link.link_status = ETH_LINK_UP;
2388         }
2389
2390         return rte_eth_linkstatus_set(dev, &link);
2391 }
2392
2393 static int
2394 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2395 {
2396         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2397         struct virtio_hw *hw = dev->data->dev_private;
2398         uint64_t offloads = rxmode->offloads;
2399
2400         if (mask & ETH_VLAN_FILTER_MASK) {
2401                 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2402                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2403
2404                         PMD_DRV_LOG(NOTICE,
2405                                 "vlan filtering not available on this host");
2406
2407                         return -ENOTSUP;
2408                 }
2409         }
2410
2411         if (mask & ETH_VLAN_STRIP_MASK)
2412                 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2413
2414         return 0;
2415 }
2416
2417 static int
2418 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2419 {
2420         uint64_t tso_mask, host_features;
2421         struct virtio_hw *hw = dev->data->dev_private;
2422         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2423
2424         dev_info->max_rx_queues =
2425                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2426         dev_info->max_tx_queues =
2427                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2428         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2429         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2430         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2431
2432         host_features = VIRTIO_OPS(hw)->get_features(hw);
2433         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2434         dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2435         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2436                 dev_info->rx_offload_capa |=
2437                         DEV_RX_OFFLOAD_TCP_CKSUM |
2438                         DEV_RX_OFFLOAD_UDP_CKSUM;
2439         }
2440         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2441                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2442         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2443                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2444         if ((host_features & tso_mask) == tso_mask)
2445                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2446
2447         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2448                                     DEV_TX_OFFLOAD_VLAN_INSERT;
2449         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2450                 dev_info->tx_offload_capa |=
2451                         DEV_TX_OFFLOAD_UDP_CKSUM |
2452                         DEV_TX_OFFLOAD_TCP_CKSUM;
2453         }
2454         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2455                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2456         if ((host_features & tso_mask) == tso_mask)
2457                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2458
2459         return 0;
2460 }
2461
2462 /*
2463  * It enables testpmd to collect per queue stats.
2464  */
2465 static int
2466 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2467 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2468 __rte_unused uint8_t is_rx)
2469 {
2470         return 0;
2471 }
2472
2473 RTE_LOG_REGISTER(virtio_logtype_init, pmd.net.virtio.init, NOTICE);
2474 RTE_LOG_REGISTER(virtio_logtype_driver, pmd.net.virtio.driver, NOTICE);