9690eb4fb2cbb33b98f78e96b98d3c0e7905c2f4
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47         uint32_t *speed,
48         int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50                                 struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52         int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54
55 static void virtio_set_hwaddr(struct virtio_hw *hw);
56 static void virtio_get_hwaddr(struct virtio_hw *hw);
57
58 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
59                                  struct rte_eth_stats *stats);
60 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
61                                  struct rte_eth_xstat *xstats, unsigned n);
62 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
63                                        struct rte_eth_xstat_name *xstats_names,
64                                        unsigned limit);
65 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
66 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
67 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
68                                 uint16_t vlan_id, int on);
69 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
70                                 struct rte_ether_addr *mac_addr,
71                                 uint32_t index, uint32_t vmdq);
72 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
73 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
74                                 struct rte_ether_addr *mac_addr);
75
76 static int virtio_intr_disable(struct rte_eth_dev *dev);
77
78 static int virtio_dev_queue_stats_mapping_set(
79         struct rte_eth_dev *eth_dev,
80         uint16_t queue_id,
81         uint8_t stat_idx,
82         uint8_t is_rx);
83
84 static void virtio_notify_peers(struct rte_eth_dev *dev);
85 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
86
87 struct rte_virtio_xstats_name_off {
88         char name[RTE_ETH_XSTATS_NAME_SIZE];
89         unsigned offset;
90 };
91
92 /* [rt]x_qX_ is prepended to the name string here */
93 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
94         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
95         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
96         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
97         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
98         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
99         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
100         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
101         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
102         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
103         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
104         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
105         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
106         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
107 };
108
109 /* [rt]x_qX_ is prepended to the name string here */
110 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
111         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
112         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
113         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
114         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
115         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
116         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
117         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
118         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
119         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
120         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
121         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
122         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
123 };
124
125 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
126                             sizeof(rte_virtio_rxq_stat_strings[0]))
127 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
128                             sizeof(rte_virtio_txq_stat_strings[0]))
129
130 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
131
132 static struct virtio_pmd_ctrl *
133 virtio_send_command_packed(struct virtnet_ctl *cvq,
134                            struct virtio_pmd_ctrl *ctrl,
135                            int *dlen, int pkt_num)
136 {
137         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
138         int head;
139         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
140         struct virtio_pmd_ctrl *result;
141         uint16_t flags;
142         int sum = 0;
143         int nb_descs = 0;
144         int k;
145
146         /*
147          * Format is enforced in qemu code:
148          * One TX packet for header;
149          * At least one TX packet per argument;
150          * One RX packet for ACK.
151          */
152         head = vq->vq_avail_idx;
153         flags = vq->vq_packed.cached_flags;
154         desc[head].addr = cvq->virtio_net_hdr_mem;
155         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
156         vq->vq_free_cnt--;
157         nb_descs++;
158         if (++vq->vq_avail_idx >= vq->vq_nentries) {
159                 vq->vq_avail_idx -= vq->vq_nentries;
160                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
161         }
162
163         for (k = 0; k < pkt_num; k++) {
164                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
165                         + sizeof(struct virtio_net_ctrl_hdr)
166                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
167                 desc[vq->vq_avail_idx].len = dlen[k];
168                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
169                         vq->vq_packed.cached_flags;
170                 sum += dlen[k];
171                 vq->vq_free_cnt--;
172                 nb_descs++;
173                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
174                         vq->vq_avail_idx -= vq->vq_nentries;
175                         vq->vq_packed.cached_flags ^=
176                                 VRING_PACKED_DESC_F_AVAIL_USED;
177                 }
178         }
179
180         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
181                 + sizeof(struct virtio_net_ctrl_hdr);
182         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
183         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
184                 vq->vq_packed.cached_flags;
185         vq->vq_free_cnt--;
186         nb_descs++;
187         if (++vq->vq_avail_idx >= vq->vq_nentries) {
188                 vq->vq_avail_idx -= vq->vq_nentries;
189                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
190         }
191
192         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
193                         vq->hw->weak_barriers);
194
195         virtio_wmb(vq->hw->weak_barriers);
196         virtqueue_notify(vq);
197
198         /* wait for used desc in virtqueue
199          * desc_is_used has a load-acquire or rte_io_rmb inside
200          */
201         while (!desc_is_used(&desc[head], vq))
202                 usleep(100);
203
204         /* now get used descriptors */
205         vq->vq_free_cnt += nb_descs;
206         vq->vq_used_cons_idx += nb_descs;
207         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
208                 vq->vq_used_cons_idx -= vq->vq_nentries;
209                 vq->vq_packed.used_wrap_counter ^= 1;
210         }
211
212         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
213                         "vq->vq_avail_idx=%d\n"
214                         "vq->vq_used_cons_idx=%d\n"
215                         "vq->vq_packed.cached_flags=0x%x\n"
216                         "vq->vq_packed.used_wrap_counter=%d\n",
217                         vq->vq_free_cnt,
218                         vq->vq_avail_idx,
219                         vq->vq_used_cons_idx,
220                         vq->vq_packed.cached_flags,
221                         vq->vq_packed.used_wrap_counter);
222
223         result = cvq->virtio_net_hdr_mz->addr;
224         return result;
225 }
226
227 static struct virtio_pmd_ctrl *
228 virtio_send_command_split(struct virtnet_ctl *cvq,
229                           struct virtio_pmd_ctrl *ctrl,
230                           int *dlen, int pkt_num)
231 {
232         struct virtio_pmd_ctrl *result;
233         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
234         uint32_t head, i;
235         int k, sum = 0;
236
237         head = vq->vq_desc_head_idx;
238
239         /*
240          * Format is enforced in qemu code:
241          * One TX packet for header;
242          * At least one TX packet per argument;
243          * One RX packet for ACK.
244          */
245         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
246         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
247         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
248         vq->vq_free_cnt--;
249         i = vq->vq_split.ring.desc[head].next;
250
251         for (k = 0; k < pkt_num; k++) {
252                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
253                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
254                         + sizeof(struct virtio_net_ctrl_hdr)
255                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
256                 vq->vq_split.ring.desc[i].len = dlen[k];
257                 sum += dlen[k];
258                 vq->vq_free_cnt--;
259                 i = vq->vq_split.ring.desc[i].next;
260         }
261
262         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
263         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
264                         + sizeof(struct virtio_net_ctrl_hdr);
265         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
266         vq->vq_free_cnt--;
267
268         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
269
270         vq_update_avail_ring(vq, head);
271         vq_update_avail_idx(vq);
272
273         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
274
275         virtqueue_notify(vq);
276
277         while (virtqueue_nused(vq) == 0)
278                 usleep(100);
279
280         while (virtqueue_nused(vq)) {
281                 uint32_t idx, desc_idx, used_idx;
282                 struct vring_used_elem *uep;
283
284                 used_idx = (uint32_t)(vq->vq_used_cons_idx
285                                 & (vq->vq_nentries - 1));
286                 uep = &vq->vq_split.ring.used->ring[used_idx];
287                 idx = (uint32_t) uep->id;
288                 desc_idx = idx;
289
290                 while (vq->vq_split.ring.desc[desc_idx].flags &
291                                 VRING_DESC_F_NEXT) {
292                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
293                         vq->vq_free_cnt++;
294                 }
295
296                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
297                 vq->vq_desc_head_idx = idx;
298
299                 vq->vq_used_cons_idx++;
300                 vq->vq_free_cnt++;
301         }
302
303         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
304                         vq->vq_free_cnt, vq->vq_desc_head_idx);
305
306         result = cvq->virtio_net_hdr_mz->addr;
307         return result;
308 }
309
310 static int
311 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
312                     int *dlen, int pkt_num)
313 {
314         virtio_net_ctrl_ack status = ~0;
315         struct virtio_pmd_ctrl *result;
316         struct virtqueue *vq;
317
318         ctrl->status = status;
319
320         if (!cvq) {
321                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
322                 return -1;
323         }
324
325         rte_spinlock_lock(&cvq->lock);
326         vq = virtnet_cq_to_vq(cvq);
327
328         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
329                 "vq->hw->cvq = %p vq = %p",
330                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
331
332         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
333                 rte_spinlock_unlock(&cvq->lock);
334                 return -1;
335         }
336
337         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
338                 sizeof(struct virtio_pmd_ctrl));
339
340         if (virtio_with_packed_queue(vq->hw))
341                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
342         else
343                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
344
345         rte_spinlock_unlock(&cvq->lock);
346         return result->status;
347 }
348
349 static int
350 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
351 {
352         struct virtio_hw *hw = dev->data->dev_private;
353         struct virtio_pmd_ctrl ctrl;
354         int dlen[1];
355         int ret;
356
357         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
358         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
359         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
360
361         dlen[0] = sizeof(uint16_t);
362
363         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
364         if (ret) {
365                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
366                           "failed, this is too late now...");
367                 return -EINVAL;
368         }
369
370         return 0;
371 }
372
373 static void
374 virtio_dev_queue_release(void *queue __rte_unused)
375 {
376         /* do nothing */
377 }
378
379 static uint16_t
380 virtio_get_nr_vq(struct virtio_hw *hw)
381 {
382         uint16_t nr_vq = hw->max_queue_pairs * 2;
383
384         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
385                 nr_vq += 1;
386
387         return nr_vq;
388 }
389
390 static void
391 virtio_init_vring(struct virtqueue *vq)
392 {
393         int size = vq->vq_nentries;
394         uint8_t *ring_mem = vq->vq_ring_virt_mem;
395
396         PMD_INIT_FUNC_TRACE();
397
398         memset(ring_mem, 0, vq->vq_ring_size);
399
400         vq->vq_used_cons_idx = 0;
401         vq->vq_desc_head_idx = 0;
402         vq->vq_avail_idx = 0;
403         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
404         vq->vq_free_cnt = vq->vq_nentries;
405         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
406         if (virtio_with_packed_queue(vq->hw)) {
407                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
408                                   VIRTIO_VRING_ALIGN, size);
409                 vring_desc_init_packed(vq, size);
410         } else {
411                 struct vring *vr = &vq->vq_split.ring;
412
413                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
414                 vring_desc_init_split(vr->desc, size);
415         }
416         /*
417          * Disable device(host) interrupting guest
418          */
419         virtqueue_disable_intr(vq);
420 }
421
422 static int
423 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
424 {
425         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
426         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
427         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
428         unsigned int vq_size, size;
429         struct virtio_hw *hw = dev->data->dev_private;
430         struct virtnet_rx *rxvq = NULL;
431         struct virtnet_tx *txvq = NULL;
432         struct virtnet_ctl *cvq = NULL;
433         struct virtqueue *vq;
434         size_t sz_hdr_mz = 0;
435         void *sw_ring = NULL;
436         int queue_type = virtio_get_queue_type(hw, queue_idx);
437         int ret;
438         int numa_node = dev->device->numa_node;
439         struct rte_mbuf *fake_mbuf = NULL;
440
441         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
442                         queue_idx, numa_node);
443
444         /*
445          * Read the virtqueue size from the Queue Size field
446          * Always power of 2 and if 0 virtqueue does not exist
447          */
448         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
449         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
450         if (vq_size == 0) {
451                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
452                 return -EINVAL;
453         }
454
455         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
456                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
457                 return -EINVAL;
458         }
459
460         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
461                  dev->data->port_id, queue_idx);
462
463         size = RTE_ALIGN_CEIL(sizeof(*vq) +
464                                 vq_size * sizeof(struct vq_desc_extra),
465                                 RTE_CACHE_LINE_SIZE);
466         if (queue_type == VTNET_TQ) {
467                 /*
468                  * For each xmit packet, allocate a virtio_net_hdr
469                  * and indirect ring elements
470                  */
471                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
472         } else if (queue_type == VTNET_CQ) {
473                 /* Allocate a page for control vq command, data and status */
474                 sz_hdr_mz = rte_mem_page_size();
475         }
476
477         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
478                                 numa_node);
479         if (vq == NULL) {
480                 PMD_INIT_LOG(ERR, "can not allocate vq");
481                 return -ENOMEM;
482         }
483         hw->vqs[queue_idx] = vq;
484
485         vq->hw = hw;
486         vq->vq_queue_index = queue_idx;
487         vq->vq_nentries = vq_size;
488         if (virtio_with_packed_queue(hw)) {
489                 vq->vq_packed.used_wrap_counter = 1;
490                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
491                 vq->vq_packed.event_flags_shadow = 0;
492                 if (queue_type == VTNET_RQ)
493                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
494         }
495
496         /*
497          * Reserve a memzone for vring elements
498          */
499         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
500         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
501         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
502                      size, vq->vq_ring_size);
503
504         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
505                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
506                         VIRTIO_VRING_ALIGN);
507         if (mz == NULL) {
508                 if (rte_errno == EEXIST)
509                         mz = rte_memzone_lookup(vq_name);
510                 if (mz == NULL) {
511                         ret = -ENOMEM;
512                         goto free_vq;
513                 }
514         }
515
516         memset(mz->addr, 0, mz->len);
517
518         vq->vq_ring_mem = mz->iova;
519         vq->vq_ring_virt_mem = mz->addr;
520         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
521                      (uint64_t)mz->iova);
522         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
523                      (uint64_t)(uintptr_t)mz->addr);
524
525         virtio_init_vring(vq);
526
527         if (sz_hdr_mz) {
528                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
529                          dev->data->port_id, queue_idx);
530                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
531                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
532                                 RTE_CACHE_LINE_SIZE);
533                 if (hdr_mz == NULL) {
534                         if (rte_errno == EEXIST)
535                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
536                         if (hdr_mz == NULL) {
537                                 ret = -ENOMEM;
538                                 goto free_mz;
539                         }
540                 }
541         }
542
543         if (queue_type == VTNET_RQ) {
544                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
545                                sizeof(vq->sw_ring[0]);
546
547                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
548                                 RTE_CACHE_LINE_SIZE, numa_node);
549                 if (!sw_ring) {
550                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
551                         ret = -ENOMEM;
552                         goto free_hdr_mz;
553                 }
554
555                 fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
556                                 RTE_CACHE_LINE_SIZE, numa_node);
557                 if (!fake_mbuf) {
558                         PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
559                         ret = -ENOMEM;
560                         goto free_sw_ring;
561                 }
562
563                 vq->sw_ring = sw_ring;
564                 rxvq = &vq->rxq;
565                 rxvq->port_id = dev->data->port_id;
566                 rxvq->mz = mz;
567                 rxvq->fake_mbuf = fake_mbuf;
568         } else if (queue_type == VTNET_TQ) {
569                 txvq = &vq->txq;
570                 txvq->port_id = dev->data->port_id;
571                 txvq->mz = mz;
572                 txvq->virtio_net_hdr_mz = hdr_mz;
573                 txvq->virtio_net_hdr_mem = hdr_mz->iova;
574         } else if (queue_type == VTNET_CQ) {
575                 cvq = &vq->cq;
576                 cvq->mz = mz;
577                 cvq->virtio_net_hdr_mz = hdr_mz;
578                 cvq->virtio_net_hdr_mem = hdr_mz->iova;
579                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
580
581                 hw->cvq = cvq;
582         }
583
584         if (queue_type == VTNET_TQ) {
585                 struct virtio_tx_region *txr;
586                 unsigned int i;
587
588                 txr = hdr_mz->addr;
589                 memset(txr, 0, vq_size * sizeof(*txr));
590                 for (i = 0; i < vq_size; i++) {
591                         /* first indirect descriptor is always the tx header */
592                         if (!virtio_with_packed_queue(hw)) {
593                                 struct vring_desc *start_dp = txr[i].tx_indir;
594                                 vring_desc_init_split(start_dp,
595                                                       RTE_DIM(txr[i].tx_indir));
596                                 start_dp->addr = txvq->virtio_net_hdr_mem
597                                         + i * sizeof(*txr)
598                                         + offsetof(struct virtio_tx_region,
599                                                    tx_hdr);
600                                 start_dp->len = hw->vtnet_hdr_size;
601                                 start_dp->flags = VRING_DESC_F_NEXT;
602                         } else {
603                                 struct vring_packed_desc *start_dp =
604                                         txr[i].tx_packed_indir;
605                                 vring_desc_init_indirect_packed(start_dp,
606                                       RTE_DIM(txr[i].tx_packed_indir));
607                                 start_dp->addr = txvq->virtio_net_hdr_mem
608                                         + i * sizeof(*txr)
609                                         + offsetof(struct virtio_tx_region,
610                                                    tx_hdr);
611                                 start_dp->len = hw->vtnet_hdr_size;
612                         }
613                 }
614         }
615
616         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
617                 PMD_INIT_LOG(ERR, "setup_queue failed");
618                 ret = -EINVAL;
619                 goto clean_vq;
620         }
621
622         return 0;
623
624 clean_vq:
625         hw->cvq = NULL;
626         rte_free(fake_mbuf);
627 free_sw_ring:
628         rte_free(sw_ring);
629 free_hdr_mz:
630         rte_memzone_free(hdr_mz);
631 free_mz:
632         rte_memzone_free(mz);
633 free_vq:
634         rte_free(vq);
635
636         return ret;
637 }
638
639 static void
640 virtio_free_queues(struct virtio_hw *hw)
641 {
642         uint16_t nr_vq = virtio_get_nr_vq(hw);
643         struct virtqueue *vq;
644         int queue_type;
645         uint16_t i;
646
647         if (hw->vqs == NULL)
648                 return;
649
650         for (i = 0; i < nr_vq; i++) {
651                 vq = hw->vqs[i];
652                 if (!vq)
653                         continue;
654
655                 queue_type = virtio_get_queue_type(hw, i);
656                 if (queue_type == VTNET_RQ) {
657                         rte_free(vq->rxq.fake_mbuf);
658                         rte_free(vq->sw_ring);
659                         rte_memzone_free(vq->rxq.mz);
660                 } else if (queue_type == VTNET_TQ) {
661                         rte_memzone_free(vq->txq.mz);
662                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
663                 } else {
664                         rte_memzone_free(vq->cq.mz);
665                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
666                 }
667
668                 rte_free(vq);
669                 hw->vqs[i] = NULL;
670         }
671
672         rte_free(hw->vqs);
673         hw->vqs = NULL;
674 }
675
676 static int
677 virtio_alloc_queues(struct rte_eth_dev *dev)
678 {
679         struct virtio_hw *hw = dev->data->dev_private;
680         uint16_t nr_vq = virtio_get_nr_vq(hw);
681         uint16_t i;
682         int ret;
683
684         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
685         if (!hw->vqs) {
686                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
687                 return -ENOMEM;
688         }
689
690         for (i = 0; i < nr_vq; i++) {
691                 ret = virtio_init_queue(dev, i);
692                 if (ret < 0) {
693                         virtio_free_queues(hw);
694                         return ret;
695                 }
696         }
697
698         return 0;
699 }
700
701 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
702
703 int
704 virtio_dev_close(struct rte_eth_dev *dev)
705 {
706         struct virtio_hw *hw = dev->data->dev_private;
707         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
708
709         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
710         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
711                 return 0;
712
713         if (!hw->opened)
714                 return 0;
715         hw->opened = 0;
716
717         /* reset the NIC */
718         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
719                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
720         if (intr_conf->rxq)
721                 virtio_queues_unbind_intr(dev);
722
723         if (intr_conf->lsc || intr_conf->rxq) {
724                 virtio_intr_disable(dev);
725                 rte_intr_efd_disable(dev->intr_handle);
726                 rte_free(dev->intr_handle->intr_vec);
727                 dev->intr_handle->intr_vec = NULL;
728         }
729
730         virtio_reset(hw);
731         virtio_dev_free_mbufs(dev);
732         virtio_free_queues(hw);
733
734         return VIRTIO_OPS(hw)->dev_close(hw);
735 }
736
737 static int
738 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
739 {
740         struct virtio_hw *hw = dev->data->dev_private;
741         struct virtio_pmd_ctrl ctrl;
742         int dlen[1];
743         int ret;
744
745         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
746                 PMD_INIT_LOG(INFO, "host does not support rx control");
747                 return -ENOTSUP;
748         }
749
750         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
751         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
752         ctrl.data[0] = 1;
753         dlen[0] = 1;
754
755         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
756         if (ret) {
757                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
758                 return -EAGAIN;
759         }
760
761         return 0;
762 }
763
764 static int
765 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
766 {
767         struct virtio_hw *hw = dev->data->dev_private;
768         struct virtio_pmd_ctrl ctrl;
769         int dlen[1];
770         int ret;
771
772         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
773                 PMD_INIT_LOG(INFO, "host does not support rx control");
774                 return -ENOTSUP;
775         }
776
777         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
778         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
779         ctrl.data[0] = 0;
780         dlen[0] = 1;
781
782         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
783         if (ret) {
784                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
785                 return -EAGAIN;
786         }
787
788         return 0;
789 }
790
791 static int
792 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
793 {
794         struct virtio_hw *hw = dev->data->dev_private;
795         struct virtio_pmd_ctrl ctrl;
796         int dlen[1];
797         int ret;
798
799         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
800                 PMD_INIT_LOG(INFO, "host does not support rx control");
801                 return -ENOTSUP;
802         }
803
804         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
805         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
806         ctrl.data[0] = 1;
807         dlen[0] = 1;
808
809         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
810         if (ret) {
811                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
812                 return -EAGAIN;
813         }
814
815         return 0;
816 }
817
818 static int
819 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
820 {
821         struct virtio_hw *hw = dev->data->dev_private;
822         struct virtio_pmd_ctrl ctrl;
823         int dlen[1];
824         int ret;
825
826         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
827                 PMD_INIT_LOG(INFO, "host does not support rx control");
828                 return -ENOTSUP;
829         }
830
831         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
832         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
833         ctrl.data[0] = 0;
834         dlen[0] = 1;
835
836         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
837         if (ret) {
838                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
839                 return -EAGAIN;
840         }
841
842         return 0;
843 }
844
845 uint16_t
846 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
847 {
848         return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
849 }
850
851 bool
852 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
853                         bool rx_scatter_enabled, const char **error)
854 {
855         if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
856                 *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
857                 return false;
858         }
859
860         return true;
861 }
862
863 static bool
864 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
865                                       uint16_t frame_size)
866 {
867         struct virtio_hw *hw = dev->data->dev_private;
868         struct virtnet_rx *rxvq;
869         struct virtqueue *vq;
870         unsigned int qidx;
871         uint16_t buf_size;
872         const char *error;
873
874         if (hw->vqs == NULL)
875                 return true;
876
877         for (qidx = 0; (vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX]) != NULL;
878              qidx++) {
879                 rxvq = &vq->rxq;
880                 if (rxvq->mpool == NULL)
881                         continue;
882                 buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
883
884                 if (!virtio_rx_check_scatter(frame_size, buf_size,
885                                              hw->rx_ol_scatter, &error)) {
886                         PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
887                                      qidx, error);
888                         return false;
889                 }
890         }
891
892         return true;
893 }
894
895 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
896 static int
897 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
898 {
899         struct virtio_hw *hw = dev->data->dev_private;
900         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
901                                  hw->vtnet_hdr_size;
902         uint32_t frame_size = mtu + ether_hdr_len;
903         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
904
905         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
906
907         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
908                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
909                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
910                 return -EINVAL;
911         }
912
913         if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
914                 PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
915                 return -EINVAL;
916         }
917
918         hw->max_rx_pkt_len = frame_size;
919         dev->data->dev_conf.rxmode.max_rx_pkt_len = hw->max_rx_pkt_len;
920
921         return 0;
922 }
923
924 static int
925 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
926 {
927         struct virtio_hw *hw = dev->data->dev_private;
928         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
929         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
930
931         virtqueue_enable_intr(vq);
932         virtio_mb(hw->weak_barriers);
933         return 0;
934 }
935
936 static int
937 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
938 {
939         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
940         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
941
942         virtqueue_disable_intr(vq);
943         return 0;
944 }
945
946 /*
947  * dev_ops for virtio, bare necessities for basic operation
948  */
949 static const struct eth_dev_ops virtio_eth_dev_ops = {
950         .dev_configure           = virtio_dev_configure,
951         .dev_start               = virtio_dev_start,
952         .dev_stop                = virtio_dev_stop,
953         .dev_close               = virtio_dev_close,
954         .promiscuous_enable      = virtio_dev_promiscuous_enable,
955         .promiscuous_disable     = virtio_dev_promiscuous_disable,
956         .allmulticast_enable     = virtio_dev_allmulticast_enable,
957         .allmulticast_disable    = virtio_dev_allmulticast_disable,
958         .mtu_set                 = virtio_mtu_set,
959         .dev_infos_get           = virtio_dev_info_get,
960         .stats_get               = virtio_dev_stats_get,
961         .xstats_get              = virtio_dev_xstats_get,
962         .xstats_get_names        = virtio_dev_xstats_get_names,
963         .stats_reset             = virtio_dev_stats_reset,
964         .xstats_reset            = virtio_dev_stats_reset,
965         .link_update             = virtio_dev_link_update,
966         .vlan_offload_set        = virtio_dev_vlan_offload_set,
967         .rx_queue_setup          = virtio_dev_rx_queue_setup,
968         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
969         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
970         .rx_queue_release        = virtio_dev_queue_release,
971         .tx_queue_setup          = virtio_dev_tx_queue_setup,
972         .tx_queue_release        = virtio_dev_queue_release,
973         /* collect stats per queue */
974         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
975         .vlan_filter_set         = virtio_vlan_filter_set,
976         .mac_addr_add            = virtio_mac_addr_add,
977         .mac_addr_remove         = virtio_mac_addr_remove,
978         .mac_addr_set            = virtio_mac_addr_set,
979 };
980
981 /*
982  * dev_ops for virtio-user in secondary processes, as we just have
983  * some limited supports currently.
984  */
985 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
986         .dev_infos_get           = virtio_dev_info_get,
987         .stats_get               = virtio_dev_stats_get,
988         .xstats_get              = virtio_dev_xstats_get,
989         .xstats_get_names        = virtio_dev_xstats_get_names,
990         .stats_reset             = virtio_dev_stats_reset,
991         .xstats_reset            = virtio_dev_stats_reset,
992         /* collect stats per queue */
993         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
994 };
995
996 static void
997 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
998 {
999         unsigned i;
1000
1001         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1002                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1003                 if (txvq == NULL)
1004                         continue;
1005
1006                 stats->opackets += txvq->stats.packets;
1007                 stats->obytes += txvq->stats.bytes;
1008
1009                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1010                         stats->q_opackets[i] = txvq->stats.packets;
1011                         stats->q_obytes[i] = txvq->stats.bytes;
1012                 }
1013         }
1014
1015         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1016                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1017                 if (rxvq == NULL)
1018                         continue;
1019
1020                 stats->ipackets += rxvq->stats.packets;
1021                 stats->ibytes += rxvq->stats.bytes;
1022                 stats->ierrors += rxvq->stats.errors;
1023
1024                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1025                         stats->q_ipackets[i] = rxvq->stats.packets;
1026                         stats->q_ibytes[i] = rxvq->stats.bytes;
1027                 }
1028         }
1029
1030         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1031 }
1032
1033 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1034                                        struct rte_eth_xstat_name *xstats_names,
1035                                        __rte_unused unsigned limit)
1036 {
1037         unsigned i;
1038         unsigned count = 0;
1039         unsigned t;
1040
1041         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1042                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1043
1044         if (xstats_names != NULL) {
1045                 /* Note: limit checked in rte_eth_xstats_names() */
1046
1047                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1048                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1049                         if (rxvq == NULL)
1050                                 continue;
1051                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1052                                 snprintf(xstats_names[count].name,
1053                                         sizeof(xstats_names[count].name),
1054                                         "rx_q%u_%s", i,
1055                                         rte_virtio_rxq_stat_strings[t].name);
1056                                 count++;
1057                         }
1058                 }
1059
1060                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1061                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1062                         if (txvq == NULL)
1063                                 continue;
1064                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1065                                 snprintf(xstats_names[count].name,
1066                                         sizeof(xstats_names[count].name),
1067                                         "tx_q%u_%s", i,
1068                                         rte_virtio_txq_stat_strings[t].name);
1069                                 count++;
1070                         }
1071                 }
1072                 return count;
1073         }
1074         return nstats;
1075 }
1076
1077 static int
1078 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1079                       unsigned n)
1080 {
1081         unsigned i;
1082         unsigned count = 0;
1083
1084         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1085                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1086
1087         if (n < nstats)
1088                 return nstats;
1089
1090         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1091                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1092
1093                 if (rxvq == NULL)
1094                         continue;
1095
1096                 unsigned t;
1097
1098                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1099                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1100                                 rte_virtio_rxq_stat_strings[t].offset);
1101                         xstats[count].id = count;
1102                         count++;
1103                 }
1104         }
1105
1106         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1107                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1108
1109                 if (txvq == NULL)
1110                         continue;
1111
1112                 unsigned t;
1113
1114                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1115                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1116                                 rte_virtio_txq_stat_strings[t].offset);
1117                         xstats[count].id = count;
1118                         count++;
1119                 }
1120         }
1121
1122         return count;
1123 }
1124
1125 static int
1126 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1127 {
1128         virtio_update_stats(dev, stats);
1129
1130         return 0;
1131 }
1132
1133 static int
1134 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1135 {
1136         unsigned int i;
1137
1138         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140                 if (txvq == NULL)
1141                         continue;
1142
1143                 txvq->stats.packets = 0;
1144                 txvq->stats.bytes = 0;
1145                 txvq->stats.multicast = 0;
1146                 txvq->stats.broadcast = 0;
1147                 memset(txvq->stats.size_bins, 0,
1148                        sizeof(txvq->stats.size_bins[0]) * 8);
1149         }
1150
1151         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1152                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1153                 if (rxvq == NULL)
1154                         continue;
1155
1156                 rxvq->stats.packets = 0;
1157                 rxvq->stats.bytes = 0;
1158                 rxvq->stats.errors = 0;
1159                 rxvq->stats.multicast = 0;
1160                 rxvq->stats.broadcast = 0;
1161                 memset(rxvq->stats.size_bins, 0,
1162                        sizeof(rxvq->stats.size_bins[0]) * 8);
1163         }
1164
1165         return 0;
1166 }
1167
1168 static void
1169 virtio_set_hwaddr(struct virtio_hw *hw)
1170 {
1171         virtio_write_dev_config(hw,
1172                         offsetof(struct virtio_net_config, mac),
1173                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1174 }
1175
1176 static void
1177 virtio_get_hwaddr(struct virtio_hw *hw)
1178 {
1179         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1180                 virtio_read_dev_config(hw,
1181                         offsetof(struct virtio_net_config, mac),
1182                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1183         } else {
1184                 rte_eth_random_addr(&hw->mac_addr[0]);
1185                 virtio_set_hwaddr(hw);
1186         }
1187 }
1188
1189 static int
1190 virtio_mac_table_set(struct virtio_hw *hw,
1191                      const struct virtio_net_ctrl_mac *uc,
1192                      const struct virtio_net_ctrl_mac *mc)
1193 {
1194         struct virtio_pmd_ctrl ctrl;
1195         int err, len[2];
1196
1197         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1198                 PMD_DRV_LOG(INFO, "host does not support mac table");
1199                 return -1;
1200         }
1201
1202         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1203         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1204
1205         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1206         memcpy(ctrl.data, uc, len[0]);
1207
1208         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1209         memcpy(ctrl.data + len[0], mc, len[1]);
1210
1211         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1212         if (err != 0)
1213                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1214         return err;
1215 }
1216
1217 static int
1218 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1219                     uint32_t index, uint32_t vmdq __rte_unused)
1220 {
1221         struct virtio_hw *hw = dev->data->dev_private;
1222         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1223         unsigned int i;
1224         struct virtio_net_ctrl_mac *uc, *mc;
1225
1226         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1227                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1228                 return -EINVAL;
1229         }
1230
1231         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1232                 sizeof(uc->entries));
1233         uc->entries = 0;
1234         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1235                 sizeof(mc->entries));
1236         mc->entries = 0;
1237
1238         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1239                 const struct rte_ether_addr *addr
1240                         = (i == index) ? mac_addr : addrs + i;
1241                 struct virtio_net_ctrl_mac *tbl
1242                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1243
1244                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1245         }
1246
1247         return virtio_mac_table_set(hw, uc, mc);
1248 }
1249
1250 static void
1251 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1252 {
1253         struct virtio_hw *hw = dev->data->dev_private;
1254         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1255         struct virtio_net_ctrl_mac *uc, *mc;
1256         unsigned int i;
1257
1258         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1259                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1260                 return;
1261         }
1262
1263         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1264                 sizeof(uc->entries));
1265         uc->entries = 0;
1266         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1267                 sizeof(mc->entries));
1268         mc->entries = 0;
1269
1270         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1271                 struct virtio_net_ctrl_mac *tbl;
1272
1273                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1274                         continue;
1275
1276                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1277                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1278                         RTE_ETHER_ADDR_LEN);
1279         }
1280
1281         virtio_mac_table_set(hw, uc, mc);
1282 }
1283
1284 static int
1285 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1286 {
1287         struct virtio_hw *hw = dev->data->dev_private;
1288
1289         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1290
1291         /* Use atomic update if available */
1292         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1293                 struct virtio_pmd_ctrl ctrl;
1294                 int len = RTE_ETHER_ADDR_LEN;
1295
1296                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1297                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1298
1299                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1300                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1301         }
1302
1303         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1304                 return -ENOTSUP;
1305
1306         virtio_set_hwaddr(hw);
1307         return 0;
1308 }
1309
1310 static int
1311 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1312 {
1313         struct virtio_hw *hw = dev->data->dev_private;
1314         struct virtio_pmd_ctrl ctrl;
1315         int len;
1316
1317         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1318                 return -ENOTSUP;
1319
1320         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1321         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1322         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1323         len = sizeof(vlan_id);
1324
1325         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1326 }
1327
1328 static int
1329 virtio_intr_unmask(struct rte_eth_dev *dev)
1330 {
1331         struct virtio_hw *hw = dev->data->dev_private;
1332
1333         if (rte_intr_ack(dev->intr_handle) < 0)
1334                 return -1;
1335
1336         if (VIRTIO_OPS(hw)->intr_detect)
1337                 VIRTIO_OPS(hw)->intr_detect(hw);
1338
1339         return 0;
1340 }
1341
1342 static int
1343 virtio_intr_enable(struct rte_eth_dev *dev)
1344 {
1345         struct virtio_hw *hw = dev->data->dev_private;
1346
1347         if (rte_intr_enable(dev->intr_handle) < 0)
1348                 return -1;
1349
1350         if (VIRTIO_OPS(hw)->intr_detect)
1351                 VIRTIO_OPS(hw)->intr_detect(hw);
1352
1353         return 0;
1354 }
1355
1356 static int
1357 virtio_intr_disable(struct rte_eth_dev *dev)
1358 {
1359         struct virtio_hw *hw = dev->data->dev_private;
1360
1361         if (rte_intr_disable(dev->intr_handle) < 0)
1362                 return -1;
1363
1364         if (VIRTIO_OPS(hw)->intr_detect)
1365                 VIRTIO_OPS(hw)->intr_detect(hw);
1366
1367         return 0;
1368 }
1369
1370 static int
1371 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1372 {
1373         uint64_t host_features;
1374
1375         /* Prepare guest_features: feature that driver wants to support */
1376         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1377                 req_features);
1378
1379         /* Read device(host) feature bits */
1380         host_features = VIRTIO_OPS(hw)->get_features(hw);
1381         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1382                 host_features);
1383
1384         /* If supported, ensure MTU value is valid before acknowledging it. */
1385         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1386                 struct virtio_net_config config;
1387
1388                 virtio_read_dev_config(hw,
1389                         offsetof(struct virtio_net_config, mtu),
1390                         &config.mtu, sizeof(config.mtu));
1391
1392                 if (config.mtu < RTE_ETHER_MIN_MTU)
1393                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1394         }
1395
1396         /*
1397          * Negotiate features: Subset of device feature bits are written back
1398          * guest feature bits.
1399          */
1400         hw->guest_features = req_features;
1401         hw->guest_features = virtio_negotiate_features(hw, host_features);
1402         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1403                 hw->guest_features);
1404
1405         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1406                 return -1;
1407
1408         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1409                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1410
1411                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1412                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1413                         return -1;
1414                 }
1415         }
1416
1417         hw->req_guest_features = req_features;
1418
1419         return 0;
1420 }
1421
1422 int
1423 virtio_dev_pause(struct rte_eth_dev *dev)
1424 {
1425         struct virtio_hw *hw = dev->data->dev_private;
1426
1427         rte_spinlock_lock(&hw->state_lock);
1428
1429         if (hw->started == 0) {
1430                 /* Device is just stopped. */
1431                 rte_spinlock_unlock(&hw->state_lock);
1432                 return -1;
1433         }
1434         hw->started = 0;
1435         /*
1436          * Prevent the worker threads from touching queues to avoid contention,
1437          * 1 ms should be enough for the ongoing Tx function to finish.
1438          */
1439         rte_delay_ms(1);
1440         return 0;
1441 }
1442
1443 /*
1444  * Recover hw state to let the worker threads continue.
1445  */
1446 void
1447 virtio_dev_resume(struct rte_eth_dev *dev)
1448 {
1449         struct virtio_hw *hw = dev->data->dev_private;
1450
1451         hw->started = 1;
1452         rte_spinlock_unlock(&hw->state_lock);
1453 }
1454
1455 /*
1456  * Should be called only after device is paused.
1457  */
1458 int
1459 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1460                 int nb_pkts)
1461 {
1462         struct virtio_hw *hw = dev->data->dev_private;
1463         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1464         int ret;
1465
1466         hw->inject_pkts = tx_pkts;
1467         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1468         hw->inject_pkts = NULL;
1469
1470         return ret;
1471 }
1472
1473 static void
1474 virtio_notify_peers(struct rte_eth_dev *dev)
1475 {
1476         struct virtio_hw *hw = dev->data->dev_private;
1477         struct virtnet_rx *rxvq;
1478         struct rte_mbuf *rarp_mbuf;
1479
1480         if (!dev->data->rx_queues)
1481                 return;
1482
1483         rxvq = dev->data->rx_queues[0];
1484         if (!rxvq)
1485                 return;
1486
1487         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1488                         (struct rte_ether_addr *)hw->mac_addr);
1489         if (rarp_mbuf == NULL) {
1490                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1491                 return;
1492         }
1493
1494         /* If virtio port just stopped, no need to send RARP */
1495         if (virtio_dev_pause(dev) < 0) {
1496                 rte_pktmbuf_free(rarp_mbuf);
1497                 return;
1498         }
1499
1500         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1501         virtio_dev_resume(dev);
1502 }
1503
1504 static void
1505 virtio_ack_link_announce(struct rte_eth_dev *dev)
1506 {
1507         struct virtio_hw *hw = dev->data->dev_private;
1508         struct virtio_pmd_ctrl ctrl;
1509
1510         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1511         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1512
1513         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1514 }
1515
1516 /*
1517  * Process virtio config changed interrupt. Call the callback
1518  * if link state changed, generate gratuitous RARP packet if
1519  * the status indicates an ANNOUNCE.
1520  */
1521 void
1522 virtio_interrupt_handler(void *param)
1523 {
1524         struct rte_eth_dev *dev = param;
1525         struct virtio_hw *hw = dev->data->dev_private;
1526         uint8_t isr;
1527         uint16_t status;
1528
1529         /* Read interrupt status which clears interrupt */
1530         isr = virtio_get_isr(hw);
1531         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1532
1533         if (virtio_intr_unmask(dev) < 0)
1534                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1535
1536         if (isr & VIRTIO_ISR_CONFIG) {
1537                 if (virtio_dev_link_update(dev, 0) == 0)
1538                         rte_eth_dev_callback_process(dev,
1539                                                      RTE_ETH_EVENT_INTR_LSC,
1540                                                      NULL);
1541
1542                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1543                         virtio_read_dev_config(hw,
1544                                 offsetof(struct virtio_net_config, status),
1545                                 &status, sizeof(status));
1546                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1547                                 virtio_notify_peers(dev);
1548                                 if (hw->cvq)
1549                                         virtio_ack_link_announce(dev);
1550                         }
1551                 }
1552         }
1553 }
1554
1555 /* set rx and tx handlers according to what is supported */
1556 static void
1557 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1558 {
1559         struct virtio_hw *hw = eth_dev->data->dev_private;
1560
1561         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1562         if (virtio_with_packed_queue(hw)) {
1563                 PMD_INIT_LOG(INFO,
1564                         "virtio: using packed ring %s Tx path on port %u",
1565                         hw->use_vec_tx ? "vectorized" : "standard",
1566                         eth_dev->data->port_id);
1567                 if (hw->use_vec_tx)
1568                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1569                 else
1570                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1571         } else {
1572                 if (hw->use_inorder_tx) {
1573                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1574                                 eth_dev->data->port_id);
1575                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1576                 } else {
1577                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1578                                 eth_dev->data->port_id);
1579                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1580                 }
1581         }
1582
1583         if (virtio_with_packed_queue(hw)) {
1584                 if (hw->use_vec_rx) {
1585                         PMD_INIT_LOG(INFO,
1586                                 "virtio: using packed ring vectorized Rx path on port %u",
1587                                 eth_dev->data->port_id);
1588                         eth_dev->rx_pkt_burst =
1589                                 &virtio_recv_pkts_packed_vec;
1590                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1591                         PMD_INIT_LOG(INFO,
1592                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1593                                 eth_dev->data->port_id);
1594                         eth_dev->rx_pkt_burst =
1595                                 &virtio_recv_mergeable_pkts_packed;
1596                 } else {
1597                         PMD_INIT_LOG(INFO,
1598                                 "virtio: using packed ring standard Rx path on port %u",
1599                                 eth_dev->data->port_id);
1600                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1601                 }
1602         } else {
1603                 if (hw->use_vec_rx) {
1604                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1605                                 eth_dev->data->port_id);
1606                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1607                 } else if (hw->use_inorder_rx) {
1608                         PMD_INIT_LOG(INFO,
1609                                 "virtio: using inorder Rx path on port %u",
1610                                 eth_dev->data->port_id);
1611                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1612                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1613                         PMD_INIT_LOG(INFO,
1614                                 "virtio: using mergeable buffer Rx path on port %u",
1615                                 eth_dev->data->port_id);
1616                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1617                 } else {
1618                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1619                                 eth_dev->data->port_id);
1620                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1621                 }
1622         }
1623
1624 }
1625
1626 /* Only support 1:1 queue/interrupt mapping so far.
1627  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1628  * interrupt vectors (<N+1).
1629  */
1630 static int
1631 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1632 {
1633         uint32_t i;
1634         struct virtio_hw *hw = dev->data->dev_private;
1635
1636         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1637         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1638                 dev->intr_handle->intr_vec[i] = i + 1;
1639                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1640                                                  VIRTIO_MSI_NO_VECTOR) {
1641                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1642                         return -EBUSY;
1643                 }
1644         }
1645
1646         return 0;
1647 }
1648
1649 static void
1650 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1651 {
1652         uint32_t i;
1653         struct virtio_hw *hw = dev->data->dev_private;
1654
1655         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1656         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1657                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1658                                              hw->vqs[i * VTNET_CQ],
1659                                              VIRTIO_MSI_NO_VECTOR);
1660 }
1661
1662 static int
1663 virtio_configure_intr(struct rte_eth_dev *dev)
1664 {
1665         struct virtio_hw *hw = dev->data->dev_private;
1666
1667         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1668                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1669                 return -ENOTSUP;
1670         }
1671
1672         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1673                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1674                 return -1;
1675         }
1676
1677         if (!dev->intr_handle->intr_vec) {
1678                 dev->intr_handle->intr_vec =
1679                         rte_zmalloc("intr_vec",
1680                                     hw->max_queue_pairs * sizeof(int), 0);
1681                 if (!dev->intr_handle->intr_vec) {
1682                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1683                                      hw->max_queue_pairs);
1684                         return -ENOMEM;
1685                 }
1686         }
1687
1688         /* Re-register callback to update max_intr */
1689         rte_intr_callback_unregister(dev->intr_handle,
1690                                      virtio_interrupt_handler,
1691                                      dev);
1692         rte_intr_callback_register(dev->intr_handle,
1693                                    virtio_interrupt_handler,
1694                                    dev);
1695
1696         /* DO NOT try to remove this! This function will enable msix, or QEMU
1697          * will encounter SIGSEGV when DRIVER_OK is sent.
1698          * And for legacy devices, this should be done before queue/vec binding
1699          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1700          * (22) will be ignored.
1701          */
1702         if (virtio_intr_enable(dev) < 0) {
1703                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1704                 return -1;
1705         }
1706
1707         if (virtio_queues_bind_intr(dev) < 0) {
1708                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1709                 return -1;
1710         }
1711
1712         return 0;
1713 }
1714 #define DUPLEX_UNKNOWN   0xff
1715 /* reset device and renegotiate features if needed */
1716 static int
1717 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1718 {
1719         struct virtio_hw *hw = eth_dev->data->dev_private;
1720         struct virtio_net_config *config;
1721         struct virtio_net_config local_config;
1722         int ret;
1723
1724         /* Reset the device although not necessary at startup */
1725         virtio_reset(hw);
1726
1727         if (hw->vqs) {
1728                 virtio_dev_free_mbufs(eth_dev);
1729                 virtio_free_queues(hw);
1730         }
1731
1732         /* Tell the host we've noticed this device. */
1733         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1734
1735         /* Tell the host we've known how to drive the device. */
1736         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1737         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
1738                 return -1;
1739
1740         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1741
1742         /* If host does not support both status and MSI-X then disable LSC */
1743         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
1744                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1745         else
1746                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1747
1748         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1749
1750         /* Setting up rx_header size for the device */
1751         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1752             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
1753             virtio_with_packed_queue(hw))
1754                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1755         else
1756                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1757
1758         /* Copy the permanent MAC address to: virtio_hw */
1759         virtio_get_hwaddr(hw);
1760         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1761                         &eth_dev->data->mac_addrs[0]);
1762         PMD_INIT_LOG(DEBUG,
1763                      "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1764                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1765                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1766
1767         if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1768                 if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1769                         config = &local_config;
1770                         virtio_read_dev_config(hw,
1771                                 offsetof(struct virtio_net_config, speed),
1772                                 &config->speed, sizeof(config->speed));
1773                         virtio_read_dev_config(hw,
1774                                 offsetof(struct virtio_net_config, duplex),
1775                                 &config->duplex, sizeof(config->duplex));
1776                         hw->speed = config->speed;
1777                         hw->duplex = config->duplex;
1778                 }
1779         }
1780         if (hw->duplex == DUPLEX_UNKNOWN)
1781                 hw->duplex = ETH_LINK_FULL_DUPLEX;
1782         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1783                 hw->speed, hw->duplex);
1784         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1785                 config = &local_config;
1786
1787                 virtio_read_dev_config(hw,
1788                         offsetof(struct virtio_net_config, mac),
1789                         &config->mac, sizeof(config->mac));
1790
1791                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1792                         virtio_read_dev_config(hw,
1793                                 offsetof(struct virtio_net_config, status),
1794                                 &config->status, sizeof(config->status));
1795                 } else {
1796                         PMD_INIT_LOG(DEBUG,
1797                                      "VIRTIO_NET_F_STATUS is not supported");
1798                         config->status = 0;
1799                 }
1800
1801                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ)) {
1802                         virtio_read_dev_config(hw,
1803                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1804                                 &config->max_virtqueue_pairs,
1805                                 sizeof(config->max_virtqueue_pairs));
1806                 } else {
1807                         PMD_INIT_LOG(DEBUG,
1808                                      "VIRTIO_NET_F_MQ is not supported");
1809                         config->max_virtqueue_pairs = 1;
1810                 }
1811
1812                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1813
1814                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
1815                         virtio_read_dev_config(hw,
1816                                 offsetof(struct virtio_net_config, mtu),
1817                                 &config->mtu,
1818                                 sizeof(config->mtu));
1819
1820                         /*
1821                          * MTU value has already been checked at negotiation
1822                          * time, but check again in case it has changed since
1823                          * then, which should not happen.
1824                          */
1825                         if (config->mtu < RTE_ETHER_MIN_MTU) {
1826                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1827                                                 config->mtu);
1828                                 return -1;
1829                         }
1830
1831                         hw->max_mtu = config->mtu;
1832                         /* Set initial MTU to maximum one supported by vhost */
1833                         eth_dev->data->mtu = config->mtu;
1834
1835                 } else {
1836                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1837                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1838                 }
1839
1840                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1841                                 config->max_virtqueue_pairs);
1842                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1843                 PMD_INIT_LOG(DEBUG,
1844                                 "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1845                                 config->mac[0], config->mac[1],
1846                                 config->mac[2], config->mac[3],
1847                                 config->mac[4], config->mac[5]);
1848         } else {
1849                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1850                 hw->max_queue_pairs = 1;
1851                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1852                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1853         }
1854
1855         ret = virtio_alloc_queues(eth_dev);
1856         if (ret < 0)
1857                 return ret;
1858
1859         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1860                 if (virtio_configure_intr(eth_dev) < 0) {
1861                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1862                         virtio_free_queues(hw);
1863                         return -1;
1864                 }
1865         }
1866
1867         virtio_reinit_complete(hw);
1868
1869         return 0;
1870 }
1871
1872 /*
1873  * This function is based on probe() function in virtio_pci.c
1874  * It returns 0 on success.
1875  */
1876 int
1877 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1878 {
1879         struct virtio_hw *hw = eth_dev->data->dev_private;
1880         uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1881         int vectorized = 0;
1882         int ret;
1883
1884         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1885                 PMD_INIT_LOG(ERR,
1886                         "Not sufficient headroom required = %d, avail = %d",
1887                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1888                         RTE_PKTMBUF_HEADROOM);
1889
1890                 return -1;
1891         }
1892
1893         eth_dev->dev_ops = &virtio_eth_dev_ops;
1894         eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1895
1896         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1897                 set_rxtx_funcs(eth_dev);
1898                 return 0;
1899         }
1900
1901         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1902         if (ret < 0)
1903                 return ret;
1904         hw->speed = speed;
1905         hw->duplex = DUPLEX_UNKNOWN;
1906
1907         /* Allocate memory for storing MAC addresses */
1908         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1909                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1910         if (eth_dev->data->mac_addrs == NULL) {
1911                 PMD_INIT_LOG(ERR,
1912                         "Failed to allocate %d bytes needed to store MAC addresses",
1913                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1914                 return -ENOMEM;
1915         }
1916
1917         rte_spinlock_init(&hw->state_lock);
1918
1919         /* reset device and negotiate default features */
1920         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1921         if (ret < 0)
1922                 goto err_virtio_init;
1923
1924         if (vectorized) {
1925                 if (!virtio_with_packed_queue(hw)) {
1926                         hw->use_vec_rx = 1;
1927                 } else {
1928 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1929                         hw->use_vec_rx = 1;
1930                         hw->use_vec_tx = 1;
1931 #else
1932                         PMD_DRV_LOG(INFO,
1933                                 "building environment do not support packed ring vectorized");
1934 #endif
1935                 }
1936         }
1937
1938         hw->opened = 1;
1939
1940         return 0;
1941
1942 err_virtio_init:
1943         rte_free(eth_dev->data->mac_addrs);
1944         eth_dev->data->mac_addrs = NULL;
1945         return ret;
1946 }
1947
1948 static uint32_t
1949 virtio_dev_speed_capa_get(uint32_t speed)
1950 {
1951         switch (speed) {
1952         case ETH_SPEED_NUM_10G:
1953                 return ETH_LINK_SPEED_10G;
1954         case ETH_SPEED_NUM_20G:
1955                 return ETH_LINK_SPEED_20G;
1956         case ETH_SPEED_NUM_25G:
1957                 return ETH_LINK_SPEED_25G;
1958         case ETH_SPEED_NUM_40G:
1959                 return ETH_LINK_SPEED_40G;
1960         case ETH_SPEED_NUM_50G:
1961                 return ETH_LINK_SPEED_50G;
1962         case ETH_SPEED_NUM_56G:
1963                 return ETH_LINK_SPEED_56G;
1964         case ETH_SPEED_NUM_100G:
1965                 return ETH_LINK_SPEED_100G;
1966         case ETH_SPEED_NUM_200G:
1967                 return ETH_LINK_SPEED_200G;
1968         default:
1969                 return 0;
1970         }
1971 }
1972
1973 static int vectorized_check_handler(__rte_unused const char *key,
1974                 const char *value, void *ret_val)
1975 {
1976         if (strcmp(value, "1") == 0)
1977                 *(int *)ret_val = 1;
1978         else
1979                 *(int *)ret_val = 0;
1980
1981         return 0;
1982 }
1983
1984 #define VIRTIO_ARG_SPEED      "speed"
1985 #define VIRTIO_ARG_VECTORIZED "vectorized"
1986
1987 static int
1988 link_speed_handler(const char *key __rte_unused,
1989                 const char *value, void *ret_val)
1990 {
1991         uint32_t val;
1992         if (!value || !ret_val)
1993                 return -EINVAL;
1994         val = strtoul(value, NULL, 0);
1995         /* validate input */
1996         if (virtio_dev_speed_capa_get(val) == 0)
1997                 return -EINVAL;
1998         *(uint32_t *)ret_val = val;
1999
2000         return 0;
2001 }
2002
2003
2004 static int
2005 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2006 {
2007         struct rte_kvargs *kvlist;
2008         int ret = 0;
2009
2010         if (devargs == NULL)
2011                 return 0;
2012
2013         kvlist = rte_kvargs_parse(devargs->args, NULL);
2014         if (kvlist == NULL) {
2015                 PMD_INIT_LOG(ERR, "error when parsing param");
2016                 return 0;
2017         }
2018
2019         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2020                 ret = rte_kvargs_process(kvlist,
2021                                         VIRTIO_ARG_SPEED,
2022                                         link_speed_handler, speed);
2023                 if (ret < 0) {
2024                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2025                                         VIRTIO_ARG_SPEED);
2026                         goto exit;
2027                 }
2028         }
2029
2030         if (vectorized &&
2031                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2032                 ret = rte_kvargs_process(kvlist,
2033                                 VIRTIO_ARG_VECTORIZED,
2034                                 vectorized_check_handler, vectorized);
2035                 if (ret < 0) {
2036                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2037                                         VIRTIO_ARG_VECTORIZED);
2038                         goto exit;
2039                 }
2040         }
2041
2042 exit:
2043         rte_kvargs_free(kvlist);
2044         return ret;
2045 }
2046
2047 static uint8_t
2048 rx_offload_enabled(struct virtio_hw *hw)
2049 {
2050         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2051                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2052                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2053 }
2054
2055 static uint8_t
2056 tx_offload_enabled(struct virtio_hw *hw)
2057 {
2058         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2059                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2060                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2061 }
2062
2063 /*
2064  * Configure virtio device
2065  * It returns 0 on success.
2066  */
2067 static int
2068 virtio_dev_configure(struct rte_eth_dev *dev)
2069 {
2070         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2071         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2072         struct virtio_hw *hw = dev->data->dev_private;
2073         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2074                 hw->vtnet_hdr_size;
2075         uint64_t rx_offloads = rxmode->offloads;
2076         uint64_t tx_offloads = txmode->offloads;
2077         uint64_t req_features;
2078         int ret;
2079
2080         PMD_INIT_LOG(DEBUG, "configure");
2081         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2082
2083         if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2084                 PMD_DRV_LOG(ERR,
2085                         "Unsupported Rx multi queue mode %d",
2086                         rxmode->mq_mode);
2087                 return -EINVAL;
2088         }
2089
2090         if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2091                 PMD_DRV_LOG(ERR,
2092                         "Unsupported Tx multi queue mode %d",
2093                         txmode->mq_mode);
2094                 return -EINVAL;
2095         }
2096
2097         if (dev->data->dev_conf.intr_conf.rxq) {
2098                 ret = virtio_init_device(dev, hw->req_guest_features);
2099                 if (ret < 0)
2100                         return ret;
2101         }
2102
2103         if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len)
2104                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2105
2106         hw->max_rx_pkt_len = rxmode->max_rx_pkt_len;
2107
2108         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2109                            DEV_RX_OFFLOAD_TCP_CKSUM))
2110                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2111
2112         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2113                 req_features |=
2114                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2115                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2116
2117         if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2118                            DEV_TX_OFFLOAD_TCP_CKSUM))
2119                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2120
2121         if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2122                 req_features |=
2123                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2124                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2125
2126         /* if request features changed, reinit the device */
2127         if (req_features != hw->req_guest_features) {
2128                 ret = virtio_init_device(dev, req_features);
2129                 if (ret < 0)
2130                         return ret;
2131         }
2132
2133         if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2134                             DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2135                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2136                 PMD_DRV_LOG(ERR,
2137                         "rx checksum not available on this host");
2138                 return -ENOTSUP;
2139         }
2140
2141         if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2142                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2143                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2144                 PMD_DRV_LOG(ERR,
2145                         "Large Receive Offload not available on this host");
2146                 return -ENOTSUP;
2147         }
2148
2149         /* start control queue */
2150         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2151                 virtio_dev_cq_start(dev);
2152
2153         if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2154                 hw->vlan_strip = 1;
2155
2156         hw->rx_ol_scatter = (rx_offloads & DEV_RX_OFFLOAD_SCATTER);
2157
2158         if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2159                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2160                 PMD_DRV_LOG(ERR,
2161                             "vlan filtering not available on this host");
2162                 return -ENOTSUP;
2163         }
2164
2165         hw->has_tx_offload = tx_offload_enabled(hw);
2166         hw->has_rx_offload = rx_offload_enabled(hw);
2167
2168         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2169                 /* Enable vector (0) for Link State Intrerrupt */
2170                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2171                                 VIRTIO_MSI_NO_VECTOR) {
2172                         PMD_DRV_LOG(ERR, "failed to set config vector");
2173                         return -EBUSY;
2174                 }
2175
2176         if (virtio_with_packed_queue(hw)) {
2177 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2178                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2179                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2180                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2181                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2182                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2183                         PMD_DRV_LOG(INFO,
2184                                 "disabled packed ring vectorized path for requirements not met");
2185                         hw->use_vec_rx = 0;
2186                         hw->use_vec_tx = 0;
2187                 }
2188 #elif defined(RTE_ARCH_ARM)
2189                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2190                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2191                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2192                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2193                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2194                         PMD_DRV_LOG(INFO,
2195                                 "disabled packed ring vectorized path for requirements not met");
2196                         hw->use_vec_rx = 0;
2197                         hw->use_vec_tx = 0;
2198                 }
2199 #else
2200                 hw->use_vec_rx = 0;
2201                 hw->use_vec_tx = 0;
2202 #endif
2203
2204                 if (hw->use_vec_rx) {
2205                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2206                                 PMD_DRV_LOG(INFO,
2207                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2208                                 hw->use_vec_rx = 0;
2209                         }
2210
2211                         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2212                                 PMD_DRV_LOG(INFO,
2213                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2214                                 hw->use_vec_rx = 0;
2215                         }
2216                 }
2217         } else {
2218                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2219                         hw->use_inorder_tx = 1;
2220                         hw->use_inorder_rx = 1;
2221                         hw->use_vec_rx = 0;
2222                 }
2223
2224                 if (hw->use_vec_rx) {
2225 #if defined RTE_ARCH_ARM
2226                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2227                                 PMD_DRV_LOG(INFO,
2228                                         "disabled split ring vectorized path for requirement not met");
2229                                 hw->use_vec_rx = 0;
2230                         }
2231 #endif
2232                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2233                                 PMD_DRV_LOG(INFO,
2234                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2235                                 hw->use_vec_rx = 0;
2236                         }
2237
2238                         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2239                                            DEV_RX_OFFLOAD_TCP_CKSUM |
2240                                            DEV_RX_OFFLOAD_TCP_LRO |
2241                                            DEV_RX_OFFLOAD_VLAN_STRIP)) {
2242                                 PMD_DRV_LOG(INFO,
2243                                         "disabled split ring vectorized rx for offloading enabled");
2244                                 hw->use_vec_rx = 0;
2245                         }
2246
2247                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2248                                 PMD_DRV_LOG(INFO,
2249                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2250                                 hw->use_vec_rx = 0;
2251                         }
2252                 }
2253         }
2254
2255         return 0;
2256 }
2257
2258
2259 static int
2260 virtio_dev_start(struct rte_eth_dev *dev)
2261 {
2262         uint16_t nb_queues, i;
2263         struct virtqueue *vq;
2264         struct virtio_hw *hw = dev->data->dev_private;
2265         int ret;
2266
2267         /* Finish the initialization of the queues */
2268         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2269                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2270                 if (ret < 0)
2271                         return ret;
2272         }
2273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2274                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2275                 if (ret < 0)
2276                         return ret;
2277         }
2278
2279         /* check if lsc interrupt feature is enabled */
2280         if (dev->data->dev_conf.intr_conf.lsc) {
2281                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2282                         PMD_DRV_LOG(ERR, "link status not supported by host");
2283                         return -ENOTSUP;
2284                 }
2285         }
2286
2287         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2288          * in device configure, but it could be unmapped  when device is
2289          * stopped.
2290          */
2291         if (dev->data->dev_conf.intr_conf.lsc ||
2292             dev->data->dev_conf.intr_conf.rxq) {
2293                 virtio_intr_disable(dev);
2294
2295                 /* Setup interrupt callback  */
2296                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2297                         rte_intr_callback_register(dev->intr_handle,
2298                                                    virtio_interrupt_handler,
2299                                                    dev);
2300
2301                 if (virtio_intr_enable(dev) < 0) {
2302                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2303                         return -EIO;
2304                 }
2305         }
2306
2307         /*Notify the backend
2308          *Otherwise the tap backend might already stop its queue due to fullness.
2309          *vhost backend will have no chance to be waked up
2310          */
2311         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2312         if (hw->max_queue_pairs > 1) {
2313                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2314                         return -EINVAL;
2315         }
2316
2317         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2318
2319         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2320                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2321                 /* Flush the old packets */
2322                 virtqueue_rxvq_flush(vq);
2323                 virtqueue_notify(vq);
2324         }
2325
2326         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2327                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2328                 virtqueue_notify(vq);
2329         }
2330
2331         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2332
2333         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2334                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2335                 VIRTQUEUE_DUMP(vq);
2336         }
2337
2338         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2339                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2340                 VIRTQUEUE_DUMP(vq);
2341         }
2342
2343         set_rxtx_funcs(dev);
2344         hw->started = 1;
2345
2346         /* Initialize Link state */
2347         virtio_dev_link_update(dev, 0);
2348
2349         return 0;
2350 }
2351
2352 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2353 {
2354         struct virtio_hw *hw = dev->data->dev_private;
2355         uint16_t nr_vq = virtio_get_nr_vq(hw);
2356         const char *type __rte_unused;
2357         unsigned int i, mbuf_num = 0;
2358         struct virtqueue *vq;
2359         struct rte_mbuf *buf;
2360         int queue_type;
2361
2362         if (hw->vqs == NULL)
2363                 return;
2364
2365         for (i = 0; i < nr_vq; i++) {
2366                 vq = hw->vqs[i];
2367                 if (!vq)
2368                         continue;
2369
2370                 queue_type = virtio_get_queue_type(hw, i);
2371                 if (queue_type == VTNET_RQ)
2372                         type = "rxq";
2373                 else if (queue_type == VTNET_TQ)
2374                         type = "txq";
2375                 else
2376                         continue;
2377
2378                 PMD_INIT_LOG(DEBUG,
2379                         "Before freeing %s[%d] used and unused buf",
2380                         type, i);
2381                 VIRTQUEUE_DUMP(vq);
2382
2383                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2384                         rte_pktmbuf_free(buf);
2385                         mbuf_num++;
2386                 }
2387
2388                 PMD_INIT_LOG(DEBUG,
2389                         "After freeing %s[%d] used and unused buf",
2390                         type, i);
2391                 VIRTQUEUE_DUMP(vq);
2392         }
2393
2394         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2395 }
2396
2397 /*
2398  * Stop device: disable interrupt and mark link down
2399  */
2400 int
2401 virtio_dev_stop(struct rte_eth_dev *dev)
2402 {
2403         struct virtio_hw *hw = dev->data->dev_private;
2404         struct rte_eth_link link;
2405         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2406
2407         PMD_INIT_LOG(DEBUG, "stop");
2408         dev->data->dev_started = 0;
2409
2410         rte_spinlock_lock(&hw->state_lock);
2411         if (!hw->started)
2412                 goto out_unlock;
2413         hw->started = 0;
2414
2415         if (intr_conf->lsc || intr_conf->rxq) {
2416                 virtio_intr_disable(dev);
2417
2418                 /* Reset interrupt callback  */
2419                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2420                         rte_intr_callback_unregister(dev->intr_handle,
2421                                                      virtio_interrupt_handler,
2422                                                      dev);
2423                 }
2424         }
2425
2426         memset(&link, 0, sizeof(link));
2427         rte_eth_linkstatus_set(dev, &link);
2428 out_unlock:
2429         rte_spinlock_unlock(&hw->state_lock);
2430
2431         return 0;
2432 }
2433
2434 static int
2435 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2436 {
2437         struct rte_eth_link link;
2438         uint16_t status;
2439         struct virtio_hw *hw = dev->data->dev_private;
2440
2441         memset(&link, 0, sizeof(link));
2442         link.link_duplex = hw->duplex;
2443         link.link_speed  = hw->speed;
2444         link.link_autoneg = ETH_LINK_AUTONEG;
2445
2446         if (!hw->started) {
2447                 link.link_status = ETH_LINK_DOWN;
2448                 link.link_speed = ETH_SPEED_NUM_NONE;
2449         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2450                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2451                 virtio_read_dev_config(hw,
2452                                 offsetof(struct virtio_net_config, status),
2453                                 &status, sizeof(status));
2454                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2455                         link.link_status = ETH_LINK_DOWN;
2456                         link.link_speed = ETH_SPEED_NUM_NONE;
2457                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2458                                      dev->data->port_id);
2459                 } else {
2460                         link.link_status = ETH_LINK_UP;
2461                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2462                                      dev->data->port_id);
2463                 }
2464         } else {
2465                 link.link_status = ETH_LINK_UP;
2466         }
2467
2468         return rte_eth_linkstatus_set(dev, &link);
2469 }
2470
2471 static int
2472 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2473 {
2474         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2475         struct virtio_hw *hw = dev->data->dev_private;
2476         uint64_t offloads = rxmode->offloads;
2477
2478         if (mask & ETH_VLAN_FILTER_MASK) {
2479                 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2480                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2481
2482                         PMD_DRV_LOG(NOTICE,
2483                                 "vlan filtering not available on this host");
2484
2485                         return -ENOTSUP;
2486                 }
2487         }
2488
2489         if (mask & ETH_VLAN_STRIP_MASK)
2490                 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2491
2492         return 0;
2493 }
2494
2495 static int
2496 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2497 {
2498         uint64_t tso_mask, host_features;
2499         struct virtio_hw *hw = dev->data->dev_private;
2500         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2501
2502         dev_info->max_rx_queues =
2503                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2504         dev_info->max_tx_queues =
2505                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2506         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2507         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2508         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2509         dev_info->max_mtu = hw->max_mtu;
2510
2511         host_features = VIRTIO_OPS(hw)->get_features(hw);
2512         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2513         dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2514         if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
2515                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SCATTER;
2516         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2517                 dev_info->rx_offload_capa |=
2518                         DEV_RX_OFFLOAD_TCP_CKSUM |
2519                         DEV_RX_OFFLOAD_UDP_CKSUM;
2520         }
2521         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2522                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2523         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2524                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2525         if ((host_features & tso_mask) == tso_mask)
2526                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2527
2528         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2529                                     DEV_TX_OFFLOAD_VLAN_INSERT;
2530         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2531                 dev_info->tx_offload_capa |=
2532                         DEV_TX_OFFLOAD_UDP_CKSUM |
2533                         DEV_TX_OFFLOAD_TCP_CKSUM;
2534         }
2535         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2536                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2537         if ((host_features & tso_mask) == tso_mask)
2538                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2539
2540         if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
2541                 /*
2542                  * According to 2.7 Packed Virtqueues,
2543                  * 2.7.10.1 Structure Size and Alignment:
2544                  * The Queue Size value does not have to be a power of 2.
2545                  */
2546                 dev_info->rx_desc_lim.nb_max = UINT16_MAX;
2547         } else {
2548                 /*
2549                  * According to 2.6 Split Virtqueues:
2550                  * Queue Size value is always a power of 2. The maximum Queue
2551                  * Size value is 32768.
2552                  */
2553                 dev_info->rx_desc_lim.nb_max = 32768;
2554         }
2555         /*
2556          * Actual minimum is not the same for virtqueues of different kinds,
2557          * but to avoid tangling the code with separate branches, rely on
2558          * default thresholds since desc number must be at least of their size.
2559          */
2560         dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
2561                                                RTE_VIRTIO_VPMD_RX_REARM_THRESH);
2562         dev_info->rx_desc_lim.nb_align = 1;
2563
2564         return 0;
2565 }
2566
2567 /*
2568  * It enables testpmd to collect per queue stats.
2569  */
2570 static int
2571 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2572 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2573 __rte_unused uint8_t is_rx)
2574 {
2575         return 0;
2576 }
2577
2578 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
2579 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);