net/cnxk: support Rx security offload on cn10k
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47         uint32_t *speed,
48         int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50                                 struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52         int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54
55 static void virtio_set_hwaddr(struct virtio_hw *hw);
56 static void virtio_get_hwaddr(struct virtio_hw *hw);
57
58 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
59                                  struct rte_eth_stats *stats);
60 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
61                                  struct rte_eth_xstat *xstats, unsigned n);
62 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
63                                        struct rte_eth_xstat_name *xstats_names,
64                                        unsigned limit);
65 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
66 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
67 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
68                                 uint16_t vlan_id, int on);
69 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
70                                 struct rte_ether_addr *mac_addr,
71                                 uint32_t index, uint32_t vmdq);
72 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
73 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
74                                 struct rte_ether_addr *mac_addr);
75
76 static int virtio_intr_disable(struct rte_eth_dev *dev);
77
78 static int virtio_dev_queue_stats_mapping_set(
79         struct rte_eth_dev *eth_dev,
80         uint16_t queue_id,
81         uint8_t stat_idx,
82         uint8_t is_rx);
83
84 static void virtio_notify_peers(struct rte_eth_dev *dev);
85 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
86
87 struct rte_virtio_xstats_name_off {
88         char name[RTE_ETH_XSTATS_NAME_SIZE];
89         unsigned offset;
90 };
91
92 /* [rt]x_qX_ is prepended to the name string here */
93 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
94         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
95         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
96         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
97         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
98         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
99         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
100         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
101         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
102         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
103         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
104         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
105         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
106         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
107 };
108
109 /* [rt]x_qX_ is prepended to the name string here */
110 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
111         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
112         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
113         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
114         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
115         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
116         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
117         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
118         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
119         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
120         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
121         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
122         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
123 };
124
125 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
126                             sizeof(rte_virtio_rxq_stat_strings[0]))
127 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
128                             sizeof(rte_virtio_txq_stat_strings[0]))
129
130 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
131
132 static struct virtio_pmd_ctrl *
133 virtio_send_command_packed(struct virtnet_ctl *cvq,
134                            struct virtio_pmd_ctrl *ctrl,
135                            int *dlen, int pkt_num)
136 {
137         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
138         int head;
139         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
140         struct virtio_pmd_ctrl *result;
141         uint16_t flags;
142         int sum = 0;
143         int nb_descs = 0;
144         int k;
145
146         /*
147          * Format is enforced in qemu code:
148          * One TX packet for header;
149          * At least one TX packet per argument;
150          * One RX packet for ACK.
151          */
152         head = vq->vq_avail_idx;
153         flags = vq->vq_packed.cached_flags;
154         desc[head].addr = cvq->virtio_net_hdr_mem;
155         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
156         vq->vq_free_cnt--;
157         nb_descs++;
158         if (++vq->vq_avail_idx >= vq->vq_nentries) {
159                 vq->vq_avail_idx -= vq->vq_nentries;
160                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
161         }
162
163         for (k = 0; k < pkt_num; k++) {
164                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
165                         + sizeof(struct virtio_net_ctrl_hdr)
166                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
167                 desc[vq->vq_avail_idx].len = dlen[k];
168                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
169                         vq->vq_packed.cached_flags;
170                 sum += dlen[k];
171                 vq->vq_free_cnt--;
172                 nb_descs++;
173                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
174                         vq->vq_avail_idx -= vq->vq_nentries;
175                         vq->vq_packed.cached_flags ^=
176                                 VRING_PACKED_DESC_F_AVAIL_USED;
177                 }
178         }
179
180         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
181                 + sizeof(struct virtio_net_ctrl_hdr);
182         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
183         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
184                 vq->vq_packed.cached_flags;
185         vq->vq_free_cnt--;
186         nb_descs++;
187         if (++vq->vq_avail_idx >= vq->vq_nentries) {
188                 vq->vq_avail_idx -= vq->vq_nentries;
189                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
190         }
191
192         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
193                         vq->hw->weak_barriers);
194
195         virtio_wmb(vq->hw->weak_barriers);
196         virtqueue_notify(vq);
197
198         /* wait for used desc in virtqueue
199          * desc_is_used has a load-acquire or rte_io_rmb inside
200          */
201         while (!desc_is_used(&desc[head], vq))
202                 usleep(100);
203
204         /* now get used descriptors */
205         vq->vq_free_cnt += nb_descs;
206         vq->vq_used_cons_idx += nb_descs;
207         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
208                 vq->vq_used_cons_idx -= vq->vq_nentries;
209                 vq->vq_packed.used_wrap_counter ^= 1;
210         }
211
212         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
213                         "vq->vq_avail_idx=%d\n"
214                         "vq->vq_used_cons_idx=%d\n"
215                         "vq->vq_packed.cached_flags=0x%x\n"
216                         "vq->vq_packed.used_wrap_counter=%d",
217                         vq->vq_free_cnt,
218                         vq->vq_avail_idx,
219                         vq->vq_used_cons_idx,
220                         vq->vq_packed.cached_flags,
221                         vq->vq_packed.used_wrap_counter);
222
223         result = cvq->virtio_net_hdr_mz->addr;
224         return result;
225 }
226
227 static struct virtio_pmd_ctrl *
228 virtio_send_command_split(struct virtnet_ctl *cvq,
229                           struct virtio_pmd_ctrl *ctrl,
230                           int *dlen, int pkt_num)
231 {
232         struct virtio_pmd_ctrl *result;
233         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
234         uint32_t head, i;
235         int k, sum = 0;
236
237         head = vq->vq_desc_head_idx;
238
239         /*
240          * Format is enforced in qemu code:
241          * One TX packet for header;
242          * At least one TX packet per argument;
243          * One RX packet for ACK.
244          */
245         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
246         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
247         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
248         vq->vq_free_cnt--;
249         i = vq->vq_split.ring.desc[head].next;
250
251         for (k = 0; k < pkt_num; k++) {
252                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
253                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
254                         + sizeof(struct virtio_net_ctrl_hdr)
255                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
256                 vq->vq_split.ring.desc[i].len = dlen[k];
257                 sum += dlen[k];
258                 vq->vq_free_cnt--;
259                 i = vq->vq_split.ring.desc[i].next;
260         }
261
262         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
263         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
264                         + sizeof(struct virtio_net_ctrl_hdr);
265         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
266         vq->vq_free_cnt--;
267
268         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
269
270         vq_update_avail_ring(vq, head);
271         vq_update_avail_idx(vq);
272
273         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
274
275         virtqueue_notify(vq);
276
277         while (virtqueue_nused(vq) == 0)
278                 usleep(100);
279
280         while (virtqueue_nused(vq)) {
281                 uint32_t idx, desc_idx, used_idx;
282                 struct vring_used_elem *uep;
283
284                 used_idx = (uint32_t)(vq->vq_used_cons_idx
285                                 & (vq->vq_nentries - 1));
286                 uep = &vq->vq_split.ring.used->ring[used_idx];
287                 idx = (uint32_t) uep->id;
288                 desc_idx = idx;
289
290                 while (vq->vq_split.ring.desc[desc_idx].flags &
291                                 VRING_DESC_F_NEXT) {
292                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
293                         vq->vq_free_cnt++;
294                 }
295
296                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
297                 vq->vq_desc_head_idx = idx;
298
299                 vq->vq_used_cons_idx++;
300                 vq->vq_free_cnt++;
301         }
302
303         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
304                         vq->vq_free_cnt, vq->vq_desc_head_idx);
305
306         result = cvq->virtio_net_hdr_mz->addr;
307         return result;
308 }
309
310 static int
311 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
312                     int *dlen, int pkt_num)
313 {
314         virtio_net_ctrl_ack status = ~0;
315         struct virtio_pmd_ctrl *result;
316         struct virtqueue *vq;
317
318         ctrl->status = status;
319
320         if (!cvq) {
321                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
322                 return -1;
323         }
324
325         rte_spinlock_lock(&cvq->lock);
326         vq = virtnet_cq_to_vq(cvq);
327
328         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
329                 "vq->hw->cvq = %p vq = %p",
330                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
331
332         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
333                 rte_spinlock_unlock(&cvq->lock);
334                 return -1;
335         }
336
337         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
338                 sizeof(struct virtio_pmd_ctrl));
339
340         if (virtio_with_packed_queue(vq->hw))
341                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
342         else
343                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
344
345         rte_spinlock_unlock(&cvq->lock);
346         return result->status;
347 }
348
349 static int
350 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
351 {
352         struct virtio_hw *hw = dev->data->dev_private;
353         struct virtio_pmd_ctrl ctrl;
354         int dlen[1];
355         int ret;
356
357         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
358         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
359         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
360
361         dlen[0] = sizeof(uint16_t);
362
363         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
364         if (ret) {
365                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
366                           "failed, this is too late now...");
367                 return -EINVAL;
368         }
369
370         return 0;
371 }
372
373 static void
374 virtio_dev_queue_release(void *queue __rte_unused)
375 {
376         /* do nothing */
377 }
378
379 static uint16_t
380 virtio_get_nr_vq(struct virtio_hw *hw)
381 {
382         uint16_t nr_vq = hw->max_queue_pairs * 2;
383
384         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
385                 nr_vq += 1;
386
387         return nr_vq;
388 }
389
390 static void
391 virtio_init_vring(struct virtqueue *vq)
392 {
393         int size = vq->vq_nentries;
394         uint8_t *ring_mem = vq->vq_ring_virt_mem;
395
396         PMD_INIT_FUNC_TRACE();
397
398         memset(ring_mem, 0, vq->vq_ring_size);
399
400         vq->vq_used_cons_idx = 0;
401         vq->vq_desc_head_idx = 0;
402         vq->vq_avail_idx = 0;
403         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
404         vq->vq_free_cnt = vq->vq_nentries;
405         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
406         if (virtio_with_packed_queue(vq->hw)) {
407                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
408                                   VIRTIO_VRING_ALIGN, size);
409                 vring_desc_init_packed(vq, size);
410         } else {
411                 struct vring *vr = &vq->vq_split.ring;
412
413                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
414                 vring_desc_init_split(vr->desc, size);
415         }
416         /*
417          * Disable device(host) interrupting guest
418          */
419         virtqueue_disable_intr(vq);
420 }
421
422 static int
423 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
424 {
425         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
426         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
427         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
428         unsigned int vq_size, size;
429         struct virtio_hw *hw = dev->data->dev_private;
430         struct virtnet_rx *rxvq = NULL;
431         struct virtnet_tx *txvq = NULL;
432         struct virtnet_ctl *cvq = NULL;
433         struct virtqueue *vq;
434         size_t sz_hdr_mz = 0;
435         void *sw_ring = NULL;
436         int queue_type = virtio_get_queue_type(hw, queue_idx);
437         int ret;
438         int numa_node = dev->device->numa_node;
439         struct rte_mbuf *fake_mbuf = NULL;
440
441         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
442                         queue_idx, numa_node);
443
444         /*
445          * Read the virtqueue size from the Queue Size field
446          * Always power of 2 and if 0 virtqueue does not exist
447          */
448         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
449         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
450         if (vq_size == 0) {
451                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
452                 return -EINVAL;
453         }
454
455         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
456                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
457                 return -EINVAL;
458         }
459
460         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
461                  dev->data->port_id, queue_idx);
462
463         size = RTE_ALIGN_CEIL(sizeof(*vq) +
464                                 vq_size * sizeof(struct vq_desc_extra),
465                                 RTE_CACHE_LINE_SIZE);
466         if (queue_type == VTNET_TQ) {
467                 /*
468                  * For each xmit packet, allocate a virtio_net_hdr
469                  * and indirect ring elements
470                  */
471                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
472         } else if (queue_type == VTNET_CQ) {
473                 /* Allocate a page for control vq command, data and status */
474                 sz_hdr_mz = rte_mem_page_size();
475         }
476
477         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
478                                 numa_node);
479         if (vq == NULL) {
480                 PMD_INIT_LOG(ERR, "can not allocate vq");
481                 return -ENOMEM;
482         }
483         hw->vqs[queue_idx] = vq;
484
485         vq->hw = hw;
486         vq->vq_queue_index = queue_idx;
487         vq->vq_nentries = vq_size;
488         if (virtio_with_packed_queue(hw)) {
489                 vq->vq_packed.used_wrap_counter = 1;
490                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
491                 vq->vq_packed.event_flags_shadow = 0;
492                 if (queue_type == VTNET_RQ)
493                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
494         }
495
496         /*
497          * Reserve a memzone for vring elements
498          */
499         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
500         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
501         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
502                      size, vq->vq_ring_size);
503
504         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
505                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
506                         VIRTIO_VRING_ALIGN);
507         if (mz == NULL) {
508                 if (rte_errno == EEXIST)
509                         mz = rte_memzone_lookup(vq_name);
510                 if (mz == NULL) {
511                         ret = -ENOMEM;
512                         goto free_vq;
513                 }
514         }
515
516         memset(mz->addr, 0, mz->len);
517
518         if (hw->use_va)
519                 vq->vq_ring_mem = (uintptr_t)mz->addr;
520         else
521                 vq->vq_ring_mem = mz->iova;
522
523         vq->vq_ring_virt_mem = mz->addr;
524         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
525         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
526
527         virtio_init_vring(vq);
528
529         if (sz_hdr_mz) {
530                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
531                          dev->data->port_id, queue_idx);
532                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
533                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
534                                 RTE_CACHE_LINE_SIZE);
535                 if (hdr_mz == NULL) {
536                         if (rte_errno == EEXIST)
537                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
538                         if (hdr_mz == NULL) {
539                                 ret = -ENOMEM;
540                                 goto free_mz;
541                         }
542                 }
543         }
544
545         if (queue_type == VTNET_RQ) {
546                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
547                                sizeof(vq->sw_ring[0]);
548
549                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
550                                 RTE_CACHE_LINE_SIZE, numa_node);
551                 if (!sw_ring) {
552                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
553                         ret = -ENOMEM;
554                         goto free_hdr_mz;
555                 }
556
557                 fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
558                                 RTE_CACHE_LINE_SIZE, numa_node);
559                 if (!fake_mbuf) {
560                         PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
561                         ret = -ENOMEM;
562                         goto free_sw_ring;
563                 }
564
565                 vq->sw_ring = sw_ring;
566                 rxvq = &vq->rxq;
567                 rxvq->port_id = dev->data->port_id;
568                 rxvq->mz = mz;
569                 rxvq->fake_mbuf = fake_mbuf;
570         } else if (queue_type == VTNET_TQ) {
571                 txvq = &vq->txq;
572                 txvq->port_id = dev->data->port_id;
573                 txvq->mz = mz;
574                 txvq->virtio_net_hdr_mz = hdr_mz;
575                 if (hw->use_va)
576                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
577                 else
578                         txvq->virtio_net_hdr_mem = hdr_mz->iova;
579         } else if (queue_type == VTNET_CQ) {
580                 cvq = &vq->cq;
581                 cvq->mz = mz;
582                 cvq->virtio_net_hdr_mz = hdr_mz;
583                 if (hw->use_va)
584                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
585                 else
586                         cvq->virtio_net_hdr_mem = hdr_mz->iova;
587                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
588
589                 hw->cvq = cvq;
590         }
591
592         if (hw->use_va)
593                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
594         else
595                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
596
597         if (queue_type == VTNET_TQ) {
598                 struct virtio_tx_region *txr;
599                 unsigned int i;
600
601                 txr = hdr_mz->addr;
602                 memset(txr, 0, vq_size * sizeof(*txr));
603                 for (i = 0; i < vq_size; i++) {
604                         /* first indirect descriptor is always the tx header */
605                         if (!virtio_with_packed_queue(hw)) {
606                                 struct vring_desc *start_dp = txr[i].tx_indir;
607                                 vring_desc_init_split(start_dp,
608                                                       RTE_DIM(txr[i].tx_indir));
609                                 start_dp->addr = txvq->virtio_net_hdr_mem
610                                         + i * sizeof(*txr)
611                                         + offsetof(struct virtio_tx_region,
612                                                    tx_hdr);
613                                 start_dp->len = hw->vtnet_hdr_size;
614                                 start_dp->flags = VRING_DESC_F_NEXT;
615                         } else {
616                                 struct vring_packed_desc *start_dp =
617                                         txr[i].tx_packed_indir;
618                                 vring_desc_init_indirect_packed(start_dp,
619                                       RTE_DIM(txr[i].tx_packed_indir));
620                                 start_dp->addr = txvq->virtio_net_hdr_mem
621                                         + i * sizeof(*txr)
622                                         + offsetof(struct virtio_tx_region,
623                                                    tx_hdr);
624                                 start_dp->len = hw->vtnet_hdr_size;
625                         }
626                 }
627         }
628
629         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
630                 PMD_INIT_LOG(ERR, "setup_queue failed");
631                 ret = -EINVAL;
632                 goto clean_vq;
633         }
634
635         return 0;
636
637 clean_vq:
638         hw->cvq = NULL;
639         rte_free(fake_mbuf);
640 free_sw_ring:
641         rte_free(sw_ring);
642 free_hdr_mz:
643         rte_memzone_free(hdr_mz);
644 free_mz:
645         rte_memzone_free(mz);
646 free_vq:
647         rte_free(vq);
648         hw->vqs[queue_idx] = NULL;
649
650         return ret;
651 }
652
653 static void
654 virtio_free_queues(struct virtio_hw *hw)
655 {
656         uint16_t nr_vq = virtio_get_nr_vq(hw);
657         struct virtqueue *vq;
658         int queue_type;
659         uint16_t i;
660
661         if (hw->vqs == NULL)
662                 return;
663
664         for (i = 0; i < nr_vq; i++) {
665                 vq = hw->vqs[i];
666                 if (!vq)
667                         continue;
668
669                 queue_type = virtio_get_queue_type(hw, i);
670                 if (queue_type == VTNET_RQ) {
671                         rte_free(vq->rxq.fake_mbuf);
672                         rte_free(vq->sw_ring);
673                         rte_memzone_free(vq->rxq.mz);
674                 } else if (queue_type == VTNET_TQ) {
675                         rte_memzone_free(vq->txq.mz);
676                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
677                 } else {
678                         rte_memzone_free(vq->cq.mz);
679                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
680                 }
681
682                 rte_free(vq);
683                 hw->vqs[i] = NULL;
684         }
685
686         rte_free(hw->vqs);
687         hw->vqs = NULL;
688 }
689
690 static int
691 virtio_alloc_queues(struct rte_eth_dev *dev)
692 {
693         struct virtio_hw *hw = dev->data->dev_private;
694         uint16_t nr_vq = virtio_get_nr_vq(hw);
695         uint16_t i;
696         int ret;
697
698         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
699         if (!hw->vqs) {
700                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
701                 return -ENOMEM;
702         }
703
704         for (i = 0; i < nr_vq; i++) {
705                 ret = virtio_init_queue(dev, i);
706                 if (ret < 0) {
707                         virtio_free_queues(hw);
708                         return ret;
709                 }
710         }
711
712         return 0;
713 }
714
715 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
716
717 int
718 virtio_dev_close(struct rte_eth_dev *dev)
719 {
720         struct virtio_hw *hw = dev->data->dev_private;
721         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
722
723         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
724         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
725                 return 0;
726
727         if (!hw->opened)
728                 return 0;
729         hw->opened = 0;
730
731         /* reset the NIC */
732         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
733                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
734         if (intr_conf->rxq)
735                 virtio_queues_unbind_intr(dev);
736
737         if (intr_conf->lsc || intr_conf->rxq) {
738                 virtio_intr_disable(dev);
739                 rte_intr_efd_disable(dev->intr_handle);
740                 rte_free(dev->intr_handle->intr_vec);
741                 dev->intr_handle->intr_vec = NULL;
742         }
743
744         virtio_reset(hw);
745         virtio_dev_free_mbufs(dev);
746         virtio_free_queues(hw);
747
748         return VIRTIO_OPS(hw)->dev_close(hw);
749 }
750
751 static int
752 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
753 {
754         struct virtio_hw *hw = dev->data->dev_private;
755         struct virtio_pmd_ctrl ctrl;
756         int dlen[1];
757         int ret;
758
759         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
760                 PMD_INIT_LOG(INFO, "host does not support rx control");
761                 return -ENOTSUP;
762         }
763
764         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
765         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
766         ctrl.data[0] = 1;
767         dlen[0] = 1;
768
769         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
770         if (ret) {
771                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
772                 return -EAGAIN;
773         }
774
775         return 0;
776 }
777
778 static int
779 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
780 {
781         struct virtio_hw *hw = dev->data->dev_private;
782         struct virtio_pmd_ctrl ctrl;
783         int dlen[1];
784         int ret;
785
786         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
787                 PMD_INIT_LOG(INFO, "host does not support rx control");
788                 return -ENOTSUP;
789         }
790
791         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
792         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
793         ctrl.data[0] = 0;
794         dlen[0] = 1;
795
796         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
797         if (ret) {
798                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
799                 return -EAGAIN;
800         }
801
802         return 0;
803 }
804
805 static int
806 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
807 {
808         struct virtio_hw *hw = dev->data->dev_private;
809         struct virtio_pmd_ctrl ctrl;
810         int dlen[1];
811         int ret;
812
813         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
814                 PMD_INIT_LOG(INFO, "host does not support rx control");
815                 return -ENOTSUP;
816         }
817
818         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
819         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
820         ctrl.data[0] = 1;
821         dlen[0] = 1;
822
823         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
824         if (ret) {
825                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
826                 return -EAGAIN;
827         }
828
829         return 0;
830 }
831
832 static int
833 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
834 {
835         struct virtio_hw *hw = dev->data->dev_private;
836         struct virtio_pmd_ctrl ctrl;
837         int dlen[1];
838         int ret;
839
840         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
841                 PMD_INIT_LOG(INFO, "host does not support rx control");
842                 return -ENOTSUP;
843         }
844
845         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
846         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
847         ctrl.data[0] = 0;
848         dlen[0] = 1;
849
850         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
851         if (ret) {
852                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
853                 return -EAGAIN;
854         }
855
856         return 0;
857 }
858
859 uint16_t
860 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
861 {
862         return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
863 }
864
865 bool
866 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
867                         bool rx_scatter_enabled, const char **error)
868 {
869         if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
870                 *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
871                 return false;
872         }
873
874         return true;
875 }
876
877 static bool
878 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
879                                       uint16_t frame_size)
880 {
881         struct virtio_hw *hw = dev->data->dev_private;
882         struct virtnet_rx *rxvq;
883         struct virtqueue *vq;
884         unsigned int qidx;
885         uint16_t buf_size;
886         const char *error;
887
888         if (hw->vqs == NULL)
889                 return true;
890
891         for (qidx = 0; (vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX]) != NULL;
892              qidx++) {
893                 rxvq = &vq->rxq;
894                 if (rxvq->mpool == NULL)
895                         continue;
896                 buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
897
898                 if (!virtio_rx_check_scatter(frame_size, buf_size,
899                                              hw->rx_ol_scatter, &error)) {
900                         PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
901                                      qidx, error);
902                         return false;
903                 }
904         }
905
906         return true;
907 }
908
909 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
910 static int
911 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
912 {
913         struct virtio_hw *hw = dev->data->dev_private;
914         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
915                                  hw->vtnet_hdr_size;
916         uint32_t frame_size = mtu + ether_hdr_len;
917         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
918
919         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
920
921         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
922                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
923                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
924                 return -EINVAL;
925         }
926
927         if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
928                 PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
929                 return -EINVAL;
930         }
931
932         hw->max_rx_pkt_len = frame_size;
933         dev->data->dev_conf.rxmode.max_rx_pkt_len = hw->max_rx_pkt_len;
934
935         return 0;
936 }
937
938 static int
939 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
940 {
941         struct virtio_hw *hw = dev->data->dev_private;
942         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
943         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
944
945         virtqueue_enable_intr(vq);
946         virtio_mb(hw->weak_barriers);
947         return 0;
948 }
949
950 static int
951 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
952 {
953         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
954         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
955
956         virtqueue_disable_intr(vq);
957         return 0;
958 }
959
960 /*
961  * dev_ops for virtio, bare necessities for basic operation
962  */
963 static const struct eth_dev_ops virtio_eth_dev_ops = {
964         .dev_configure           = virtio_dev_configure,
965         .dev_start               = virtio_dev_start,
966         .dev_stop                = virtio_dev_stop,
967         .dev_close               = virtio_dev_close,
968         .promiscuous_enable      = virtio_dev_promiscuous_enable,
969         .promiscuous_disable     = virtio_dev_promiscuous_disable,
970         .allmulticast_enable     = virtio_dev_allmulticast_enable,
971         .allmulticast_disable    = virtio_dev_allmulticast_disable,
972         .mtu_set                 = virtio_mtu_set,
973         .dev_infos_get           = virtio_dev_info_get,
974         .stats_get               = virtio_dev_stats_get,
975         .xstats_get              = virtio_dev_xstats_get,
976         .xstats_get_names        = virtio_dev_xstats_get_names,
977         .stats_reset             = virtio_dev_stats_reset,
978         .xstats_reset            = virtio_dev_stats_reset,
979         .link_update             = virtio_dev_link_update,
980         .vlan_offload_set        = virtio_dev_vlan_offload_set,
981         .rx_queue_setup          = virtio_dev_rx_queue_setup,
982         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
983         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
984         .rx_queue_release        = virtio_dev_queue_release,
985         .tx_queue_setup          = virtio_dev_tx_queue_setup,
986         .tx_queue_release        = virtio_dev_queue_release,
987         /* collect stats per queue */
988         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
989         .vlan_filter_set         = virtio_vlan_filter_set,
990         .mac_addr_add            = virtio_mac_addr_add,
991         .mac_addr_remove         = virtio_mac_addr_remove,
992         .mac_addr_set            = virtio_mac_addr_set,
993 };
994
995 /*
996  * dev_ops for virtio-user in secondary processes, as we just have
997  * some limited supports currently.
998  */
999 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1000         .dev_infos_get           = virtio_dev_info_get,
1001         .stats_get               = virtio_dev_stats_get,
1002         .xstats_get              = virtio_dev_xstats_get,
1003         .xstats_get_names        = virtio_dev_xstats_get_names,
1004         .stats_reset             = virtio_dev_stats_reset,
1005         .xstats_reset            = virtio_dev_stats_reset,
1006         /* collect stats per queue */
1007         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1008 };
1009
1010 static void
1011 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1012 {
1013         unsigned i;
1014
1015         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1016                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1017                 if (txvq == NULL)
1018                         continue;
1019
1020                 stats->opackets += txvq->stats.packets;
1021                 stats->obytes += txvq->stats.bytes;
1022
1023                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1024                         stats->q_opackets[i] = txvq->stats.packets;
1025                         stats->q_obytes[i] = txvq->stats.bytes;
1026                 }
1027         }
1028
1029         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1030                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1031                 if (rxvq == NULL)
1032                         continue;
1033
1034                 stats->ipackets += rxvq->stats.packets;
1035                 stats->ibytes += rxvq->stats.bytes;
1036                 stats->ierrors += rxvq->stats.errors;
1037
1038                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1039                         stats->q_ipackets[i] = rxvq->stats.packets;
1040                         stats->q_ibytes[i] = rxvq->stats.bytes;
1041                 }
1042         }
1043
1044         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1045 }
1046
1047 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1048                                        struct rte_eth_xstat_name *xstats_names,
1049                                        __rte_unused unsigned limit)
1050 {
1051         unsigned i;
1052         unsigned count = 0;
1053         unsigned t;
1054
1055         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1056                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1057
1058         if (xstats_names != NULL) {
1059                 /* Note: limit checked in rte_eth_xstats_names() */
1060
1061                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1062                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1063                         if (rxvq == NULL)
1064                                 continue;
1065                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1066                                 snprintf(xstats_names[count].name,
1067                                         sizeof(xstats_names[count].name),
1068                                         "rx_q%u_%s", i,
1069                                         rte_virtio_rxq_stat_strings[t].name);
1070                                 count++;
1071                         }
1072                 }
1073
1074                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1075                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1076                         if (txvq == NULL)
1077                                 continue;
1078                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1079                                 snprintf(xstats_names[count].name,
1080                                         sizeof(xstats_names[count].name),
1081                                         "tx_q%u_%s", i,
1082                                         rte_virtio_txq_stat_strings[t].name);
1083                                 count++;
1084                         }
1085                 }
1086                 return count;
1087         }
1088         return nstats;
1089 }
1090
1091 static int
1092 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1093                       unsigned n)
1094 {
1095         unsigned i;
1096         unsigned count = 0;
1097
1098         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1099                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1100
1101         if (n < nstats)
1102                 return nstats;
1103
1104         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1105                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1106
1107                 if (rxvq == NULL)
1108                         continue;
1109
1110                 unsigned t;
1111
1112                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1113                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1114                                 rte_virtio_rxq_stat_strings[t].offset);
1115                         xstats[count].id = count;
1116                         count++;
1117                 }
1118         }
1119
1120         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1121                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1122
1123                 if (txvq == NULL)
1124                         continue;
1125
1126                 unsigned t;
1127
1128                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1129                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1130                                 rte_virtio_txq_stat_strings[t].offset);
1131                         xstats[count].id = count;
1132                         count++;
1133                 }
1134         }
1135
1136         return count;
1137 }
1138
1139 static int
1140 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1141 {
1142         virtio_update_stats(dev, stats);
1143
1144         return 0;
1145 }
1146
1147 static int
1148 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1149 {
1150         unsigned int i;
1151
1152         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1153                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1154                 if (txvq == NULL)
1155                         continue;
1156
1157                 txvq->stats.packets = 0;
1158                 txvq->stats.bytes = 0;
1159                 txvq->stats.multicast = 0;
1160                 txvq->stats.broadcast = 0;
1161                 memset(txvq->stats.size_bins, 0,
1162                        sizeof(txvq->stats.size_bins[0]) * 8);
1163         }
1164
1165         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1166                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1167                 if (rxvq == NULL)
1168                         continue;
1169
1170                 rxvq->stats.packets = 0;
1171                 rxvq->stats.bytes = 0;
1172                 rxvq->stats.errors = 0;
1173                 rxvq->stats.multicast = 0;
1174                 rxvq->stats.broadcast = 0;
1175                 memset(rxvq->stats.size_bins, 0,
1176                        sizeof(rxvq->stats.size_bins[0]) * 8);
1177         }
1178
1179         return 0;
1180 }
1181
1182 static void
1183 virtio_set_hwaddr(struct virtio_hw *hw)
1184 {
1185         virtio_write_dev_config(hw,
1186                         offsetof(struct virtio_net_config, mac),
1187                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1188 }
1189
1190 static void
1191 virtio_get_hwaddr(struct virtio_hw *hw)
1192 {
1193         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1194                 virtio_read_dev_config(hw,
1195                         offsetof(struct virtio_net_config, mac),
1196                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1197         } else {
1198                 rte_eth_random_addr(&hw->mac_addr[0]);
1199                 virtio_set_hwaddr(hw);
1200         }
1201 }
1202
1203 static int
1204 virtio_mac_table_set(struct virtio_hw *hw,
1205                      const struct virtio_net_ctrl_mac *uc,
1206                      const struct virtio_net_ctrl_mac *mc)
1207 {
1208         struct virtio_pmd_ctrl ctrl;
1209         int err, len[2];
1210
1211         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1212                 PMD_DRV_LOG(INFO, "host does not support mac table");
1213                 return -1;
1214         }
1215
1216         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1217         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1218
1219         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1220         memcpy(ctrl.data, uc, len[0]);
1221
1222         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1223         memcpy(ctrl.data + len[0], mc, len[1]);
1224
1225         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1226         if (err != 0)
1227                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1228         return err;
1229 }
1230
1231 static int
1232 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1233                     uint32_t index, uint32_t vmdq __rte_unused)
1234 {
1235         struct virtio_hw *hw = dev->data->dev_private;
1236         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1237         unsigned int i;
1238         struct virtio_net_ctrl_mac *uc, *mc;
1239
1240         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1241                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1242                 return -EINVAL;
1243         }
1244
1245         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1246                 sizeof(uc->entries));
1247         uc->entries = 0;
1248         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1249                 sizeof(mc->entries));
1250         mc->entries = 0;
1251
1252         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1253                 const struct rte_ether_addr *addr
1254                         = (i == index) ? mac_addr : addrs + i;
1255                 struct virtio_net_ctrl_mac *tbl
1256                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1257
1258                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1259         }
1260
1261         return virtio_mac_table_set(hw, uc, mc);
1262 }
1263
1264 static void
1265 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1266 {
1267         struct virtio_hw *hw = dev->data->dev_private;
1268         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1269         struct virtio_net_ctrl_mac *uc, *mc;
1270         unsigned int i;
1271
1272         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1273                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1274                 return;
1275         }
1276
1277         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1278                 sizeof(uc->entries));
1279         uc->entries = 0;
1280         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1281                 sizeof(mc->entries));
1282         mc->entries = 0;
1283
1284         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1285                 struct virtio_net_ctrl_mac *tbl;
1286
1287                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1288                         continue;
1289
1290                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1291                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1292                         RTE_ETHER_ADDR_LEN);
1293         }
1294
1295         virtio_mac_table_set(hw, uc, mc);
1296 }
1297
1298 static int
1299 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1300 {
1301         struct virtio_hw *hw = dev->data->dev_private;
1302
1303         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1304
1305         /* Use atomic update if available */
1306         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1307                 struct virtio_pmd_ctrl ctrl;
1308                 int len = RTE_ETHER_ADDR_LEN;
1309
1310                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1311                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1312
1313                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1314                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1315         }
1316
1317         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1318                 return -ENOTSUP;
1319
1320         virtio_set_hwaddr(hw);
1321         return 0;
1322 }
1323
1324 static int
1325 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1326 {
1327         struct virtio_hw *hw = dev->data->dev_private;
1328         struct virtio_pmd_ctrl ctrl;
1329         int len;
1330
1331         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1332                 return -ENOTSUP;
1333
1334         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1335         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1336         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1337         len = sizeof(vlan_id);
1338
1339         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1340 }
1341
1342 static int
1343 virtio_intr_unmask(struct rte_eth_dev *dev)
1344 {
1345         struct virtio_hw *hw = dev->data->dev_private;
1346
1347         if (rte_intr_ack(dev->intr_handle) < 0)
1348                 return -1;
1349
1350         if (VIRTIO_OPS(hw)->intr_detect)
1351                 VIRTIO_OPS(hw)->intr_detect(hw);
1352
1353         return 0;
1354 }
1355
1356 static int
1357 virtio_intr_enable(struct rte_eth_dev *dev)
1358 {
1359         struct virtio_hw *hw = dev->data->dev_private;
1360
1361         if (rte_intr_enable(dev->intr_handle) < 0)
1362                 return -1;
1363
1364         if (VIRTIO_OPS(hw)->intr_detect)
1365                 VIRTIO_OPS(hw)->intr_detect(hw);
1366
1367         return 0;
1368 }
1369
1370 static int
1371 virtio_intr_disable(struct rte_eth_dev *dev)
1372 {
1373         struct virtio_hw *hw = dev->data->dev_private;
1374
1375         if (rte_intr_disable(dev->intr_handle) < 0)
1376                 return -1;
1377
1378         if (VIRTIO_OPS(hw)->intr_detect)
1379                 VIRTIO_OPS(hw)->intr_detect(hw);
1380
1381         return 0;
1382 }
1383
1384 static int
1385 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1386 {
1387         uint64_t host_features;
1388
1389         /* Prepare guest_features: feature that driver wants to support */
1390         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1391                 req_features);
1392
1393         /* Read device(host) feature bits */
1394         host_features = VIRTIO_OPS(hw)->get_features(hw);
1395         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1396                 host_features);
1397
1398         /* If supported, ensure MTU value is valid before acknowledging it. */
1399         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1400                 struct virtio_net_config config;
1401
1402                 virtio_read_dev_config(hw,
1403                         offsetof(struct virtio_net_config, mtu),
1404                         &config.mtu, sizeof(config.mtu));
1405
1406                 if (config.mtu < RTE_ETHER_MIN_MTU)
1407                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1408         }
1409
1410         /*
1411          * Negotiate features: Subset of device feature bits are written back
1412          * guest feature bits.
1413          */
1414         hw->guest_features = req_features;
1415         hw->guest_features = virtio_negotiate_features(hw, host_features);
1416         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1417                 hw->guest_features);
1418
1419         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1420                 return -1;
1421
1422         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1423                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1424
1425                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1426                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1427                         return -1;
1428                 }
1429         }
1430
1431         hw->req_guest_features = req_features;
1432
1433         return 0;
1434 }
1435
1436 int
1437 virtio_dev_pause(struct rte_eth_dev *dev)
1438 {
1439         struct virtio_hw *hw = dev->data->dev_private;
1440
1441         rte_spinlock_lock(&hw->state_lock);
1442
1443         if (hw->started == 0) {
1444                 /* Device is just stopped. */
1445                 rte_spinlock_unlock(&hw->state_lock);
1446                 return -1;
1447         }
1448         hw->started = 0;
1449         /*
1450          * Prevent the worker threads from touching queues to avoid contention,
1451          * 1 ms should be enough for the ongoing Tx function to finish.
1452          */
1453         rte_delay_ms(1);
1454         return 0;
1455 }
1456
1457 /*
1458  * Recover hw state to let the worker threads continue.
1459  */
1460 void
1461 virtio_dev_resume(struct rte_eth_dev *dev)
1462 {
1463         struct virtio_hw *hw = dev->data->dev_private;
1464
1465         hw->started = 1;
1466         rte_spinlock_unlock(&hw->state_lock);
1467 }
1468
1469 /*
1470  * Should be called only after device is paused.
1471  */
1472 int
1473 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1474                 int nb_pkts)
1475 {
1476         struct virtio_hw *hw = dev->data->dev_private;
1477         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1478         int ret;
1479
1480         hw->inject_pkts = tx_pkts;
1481         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1482         hw->inject_pkts = NULL;
1483
1484         return ret;
1485 }
1486
1487 static void
1488 virtio_notify_peers(struct rte_eth_dev *dev)
1489 {
1490         struct virtio_hw *hw = dev->data->dev_private;
1491         struct virtnet_rx *rxvq;
1492         struct rte_mbuf *rarp_mbuf;
1493
1494         if (!dev->data->rx_queues)
1495                 return;
1496
1497         rxvq = dev->data->rx_queues[0];
1498         if (!rxvq)
1499                 return;
1500
1501         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1502                         (struct rte_ether_addr *)hw->mac_addr);
1503         if (rarp_mbuf == NULL) {
1504                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1505                 return;
1506         }
1507
1508         /* If virtio port just stopped, no need to send RARP */
1509         if (virtio_dev_pause(dev) < 0) {
1510                 rte_pktmbuf_free(rarp_mbuf);
1511                 return;
1512         }
1513
1514         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1515         virtio_dev_resume(dev);
1516 }
1517
1518 static void
1519 virtio_ack_link_announce(struct rte_eth_dev *dev)
1520 {
1521         struct virtio_hw *hw = dev->data->dev_private;
1522         struct virtio_pmd_ctrl ctrl;
1523
1524         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1525         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1526
1527         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1528 }
1529
1530 /*
1531  * Process virtio config changed interrupt. Call the callback
1532  * if link state changed, generate gratuitous RARP packet if
1533  * the status indicates an ANNOUNCE.
1534  */
1535 void
1536 virtio_interrupt_handler(void *param)
1537 {
1538         struct rte_eth_dev *dev = param;
1539         struct virtio_hw *hw = dev->data->dev_private;
1540         uint8_t isr;
1541         uint16_t status;
1542
1543         /* Read interrupt status which clears interrupt */
1544         isr = virtio_get_isr(hw);
1545         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1546
1547         if (virtio_intr_unmask(dev) < 0)
1548                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1549
1550         if (isr & VIRTIO_ISR_CONFIG) {
1551                 if (virtio_dev_link_update(dev, 0) == 0)
1552                         rte_eth_dev_callback_process(dev,
1553                                                      RTE_ETH_EVENT_INTR_LSC,
1554                                                      NULL);
1555
1556                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1557                         virtio_read_dev_config(hw,
1558                                 offsetof(struct virtio_net_config, status),
1559                                 &status, sizeof(status));
1560                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1561                                 virtio_notify_peers(dev);
1562                                 if (hw->cvq)
1563                                         virtio_ack_link_announce(dev);
1564                         }
1565                 }
1566         }
1567 }
1568
1569 /* set rx and tx handlers according to what is supported */
1570 static void
1571 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1572 {
1573         struct virtio_hw *hw = eth_dev->data->dev_private;
1574
1575         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1576         if (virtio_with_packed_queue(hw)) {
1577                 PMD_INIT_LOG(INFO,
1578                         "virtio: using packed ring %s Tx path on port %u",
1579                         hw->use_vec_tx ? "vectorized" : "standard",
1580                         eth_dev->data->port_id);
1581                 if (hw->use_vec_tx)
1582                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1583                 else
1584                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1585         } else {
1586                 if (hw->use_inorder_tx) {
1587                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1588                                 eth_dev->data->port_id);
1589                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1590                 } else {
1591                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1592                                 eth_dev->data->port_id);
1593                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1594                 }
1595         }
1596
1597         if (virtio_with_packed_queue(hw)) {
1598                 if (hw->use_vec_rx) {
1599                         PMD_INIT_LOG(INFO,
1600                                 "virtio: using packed ring vectorized Rx path on port %u",
1601                                 eth_dev->data->port_id);
1602                         eth_dev->rx_pkt_burst =
1603                                 &virtio_recv_pkts_packed_vec;
1604                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1605                         PMD_INIT_LOG(INFO,
1606                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1607                                 eth_dev->data->port_id);
1608                         eth_dev->rx_pkt_burst =
1609                                 &virtio_recv_mergeable_pkts_packed;
1610                 } else {
1611                         PMD_INIT_LOG(INFO,
1612                                 "virtio: using packed ring standard Rx path on port %u",
1613                                 eth_dev->data->port_id);
1614                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1615                 }
1616         } else {
1617                 if (hw->use_vec_rx) {
1618                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1619                                 eth_dev->data->port_id);
1620                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1621                 } else if (hw->use_inorder_rx) {
1622                         PMD_INIT_LOG(INFO,
1623                                 "virtio: using inorder Rx path on port %u",
1624                                 eth_dev->data->port_id);
1625                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1626                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1627                         PMD_INIT_LOG(INFO,
1628                                 "virtio: using mergeable buffer Rx path on port %u",
1629                                 eth_dev->data->port_id);
1630                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1631                 } else {
1632                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1633                                 eth_dev->data->port_id);
1634                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1635                 }
1636         }
1637
1638 }
1639
1640 /* Only support 1:1 queue/interrupt mapping so far.
1641  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1642  * interrupt vectors (<N+1).
1643  */
1644 static int
1645 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1646 {
1647         uint32_t i;
1648         struct virtio_hw *hw = dev->data->dev_private;
1649
1650         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1651         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1652                 dev->intr_handle->intr_vec[i] = i + 1;
1653                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1654                                                  VIRTIO_MSI_NO_VECTOR) {
1655                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1656                         return -EBUSY;
1657                 }
1658         }
1659
1660         return 0;
1661 }
1662
1663 static void
1664 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1665 {
1666         uint32_t i;
1667         struct virtio_hw *hw = dev->data->dev_private;
1668
1669         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1670         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1671                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1672                                              hw->vqs[i * VTNET_CQ],
1673                                              VIRTIO_MSI_NO_VECTOR);
1674 }
1675
1676 static int
1677 virtio_configure_intr(struct rte_eth_dev *dev)
1678 {
1679         struct virtio_hw *hw = dev->data->dev_private;
1680
1681         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1682                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1683                 return -ENOTSUP;
1684         }
1685
1686         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1687                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1688                 return -1;
1689         }
1690
1691         if (!dev->intr_handle->intr_vec) {
1692                 dev->intr_handle->intr_vec =
1693                         rte_zmalloc("intr_vec",
1694                                     hw->max_queue_pairs * sizeof(int), 0);
1695                 if (!dev->intr_handle->intr_vec) {
1696                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1697                                      hw->max_queue_pairs);
1698                         return -ENOMEM;
1699                 }
1700         }
1701
1702         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1703                 /* Re-register callback to update max_intr */
1704                 rte_intr_callback_unregister(dev->intr_handle,
1705                                              virtio_interrupt_handler,
1706                                              dev);
1707                 rte_intr_callback_register(dev->intr_handle,
1708                                            virtio_interrupt_handler,
1709                                            dev);
1710         }
1711
1712         /* DO NOT try to remove this! This function will enable msix, or QEMU
1713          * will encounter SIGSEGV when DRIVER_OK is sent.
1714          * And for legacy devices, this should be done before queue/vec binding
1715          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1716          * (22) will be ignored.
1717          */
1718         if (virtio_intr_enable(dev) < 0) {
1719                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1720                 return -1;
1721         }
1722
1723         if (virtio_queues_bind_intr(dev) < 0) {
1724                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1725                 return -1;
1726         }
1727
1728         return 0;
1729 }
1730 #define DUPLEX_UNKNOWN   0xff
1731 /* reset device and renegotiate features if needed */
1732 static int
1733 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1734 {
1735         struct virtio_hw *hw = eth_dev->data->dev_private;
1736         struct virtio_net_config *config;
1737         struct virtio_net_config local_config;
1738         int ret;
1739
1740         /* Reset the device although not necessary at startup */
1741         virtio_reset(hw);
1742
1743         if (hw->vqs) {
1744                 virtio_dev_free_mbufs(eth_dev);
1745                 virtio_free_queues(hw);
1746         }
1747
1748         /* Tell the host we've noticed this device. */
1749         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1750
1751         /* Tell the host we've known how to drive the device. */
1752         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1753         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
1754                 return -1;
1755
1756         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1757
1758         /* If host does not support both status and MSI-X then disable LSC */
1759         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
1760                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1761         else
1762                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1763
1764         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1765
1766         /* Setting up rx_header size for the device */
1767         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1768             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
1769             virtio_with_packed_queue(hw))
1770                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1771         else
1772                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1773
1774         /* Copy the permanent MAC address to: virtio_hw */
1775         virtio_get_hwaddr(hw);
1776         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1777                         &eth_dev->data->mac_addrs[0]);
1778         PMD_INIT_LOG(DEBUG,
1779                      "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1780                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1781                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1782
1783         if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1784                 if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1785                         config = &local_config;
1786                         virtio_read_dev_config(hw,
1787                                 offsetof(struct virtio_net_config, speed),
1788                                 &config->speed, sizeof(config->speed));
1789                         virtio_read_dev_config(hw,
1790                                 offsetof(struct virtio_net_config, duplex),
1791                                 &config->duplex, sizeof(config->duplex));
1792                         hw->speed = config->speed;
1793                         hw->duplex = config->duplex;
1794                 }
1795         }
1796         if (hw->duplex == DUPLEX_UNKNOWN)
1797                 hw->duplex = ETH_LINK_FULL_DUPLEX;
1798         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1799                 hw->speed, hw->duplex);
1800         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1801                 config = &local_config;
1802
1803                 virtio_read_dev_config(hw,
1804                         offsetof(struct virtio_net_config, mac),
1805                         &config->mac, sizeof(config->mac));
1806
1807                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1808                         virtio_read_dev_config(hw,
1809                                 offsetof(struct virtio_net_config, status),
1810                                 &config->status, sizeof(config->status));
1811                 } else {
1812                         PMD_INIT_LOG(DEBUG,
1813                                      "VIRTIO_NET_F_STATUS is not supported");
1814                         config->status = 0;
1815                 }
1816
1817                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ)) {
1818                         virtio_read_dev_config(hw,
1819                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1820                                 &config->max_virtqueue_pairs,
1821                                 sizeof(config->max_virtqueue_pairs));
1822                 } else {
1823                         PMD_INIT_LOG(DEBUG,
1824                                      "VIRTIO_NET_F_MQ is not supported");
1825                         config->max_virtqueue_pairs = 1;
1826                 }
1827
1828                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1829
1830                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
1831                         virtio_read_dev_config(hw,
1832                                 offsetof(struct virtio_net_config, mtu),
1833                                 &config->mtu,
1834                                 sizeof(config->mtu));
1835
1836                         /*
1837                          * MTU value has already been checked at negotiation
1838                          * time, but check again in case it has changed since
1839                          * then, which should not happen.
1840                          */
1841                         if (config->mtu < RTE_ETHER_MIN_MTU) {
1842                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1843                                                 config->mtu);
1844                                 return -1;
1845                         }
1846
1847                         hw->max_mtu = config->mtu;
1848                         /* Set initial MTU to maximum one supported by vhost */
1849                         eth_dev->data->mtu = config->mtu;
1850
1851                 } else {
1852                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1853                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1854                 }
1855
1856                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1857                                 config->max_virtqueue_pairs);
1858                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1859                 PMD_INIT_LOG(DEBUG,
1860                                 "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1861                                 config->mac[0], config->mac[1],
1862                                 config->mac[2], config->mac[3],
1863                                 config->mac[4], config->mac[5]);
1864         } else {
1865                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1866                 hw->max_queue_pairs = 1;
1867                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1868                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1869         }
1870
1871         ret = virtio_alloc_queues(eth_dev);
1872         if (ret < 0)
1873                 return ret;
1874
1875         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1876                 if (virtio_configure_intr(eth_dev) < 0) {
1877                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1878                         virtio_free_queues(hw);
1879                         return -1;
1880                 }
1881         }
1882
1883         virtio_reinit_complete(hw);
1884
1885         return 0;
1886 }
1887
1888 /*
1889  * This function is based on probe() function in virtio_pci.c
1890  * It returns 0 on success.
1891  */
1892 int
1893 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1894 {
1895         struct virtio_hw *hw = eth_dev->data->dev_private;
1896         uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1897         int vectorized = 0;
1898         int ret;
1899
1900         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1901                 PMD_INIT_LOG(ERR,
1902                         "Not sufficient headroom required = %d, avail = %d",
1903                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1904                         RTE_PKTMBUF_HEADROOM);
1905
1906                 return -1;
1907         }
1908
1909         eth_dev->dev_ops = &virtio_eth_dev_ops;
1910         eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1911
1912         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1913                 set_rxtx_funcs(eth_dev);
1914                 return 0;
1915         }
1916
1917         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1918         if (ret < 0)
1919                 return ret;
1920         hw->speed = speed;
1921         hw->duplex = DUPLEX_UNKNOWN;
1922
1923         /* Allocate memory for storing MAC addresses */
1924         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1925                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1926         if (eth_dev->data->mac_addrs == NULL) {
1927                 PMD_INIT_LOG(ERR,
1928                         "Failed to allocate %d bytes needed to store MAC addresses",
1929                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1930                 return -ENOMEM;
1931         }
1932
1933         rte_spinlock_init(&hw->state_lock);
1934
1935         /* reset device and negotiate default features */
1936         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1937         if (ret < 0)
1938                 goto err_virtio_init;
1939
1940         if (vectorized) {
1941                 if (!virtio_with_packed_queue(hw)) {
1942                         hw->use_vec_rx = 1;
1943                 } else {
1944 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1945                         hw->use_vec_rx = 1;
1946                         hw->use_vec_tx = 1;
1947 #else
1948                         PMD_DRV_LOG(INFO,
1949                                 "building environment do not support packed ring vectorized");
1950 #endif
1951                 }
1952         }
1953
1954         hw->opened = 1;
1955
1956         return 0;
1957
1958 err_virtio_init:
1959         rte_free(eth_dev->data->mac_addrs);
1960         eth_dev->data->mac_addrs = NULL;
1961         return ret;
1962 }
1963
1964 static uint32_t
1965 virtio_dev_speed_capa_get(uint32_t speed)
1966 {
1967         switch (speed) {
1968         case ETH_SPEED_NUM_10G:
1969                 return ETH_LINK_SPEED_10G;
1970         case ETH_SPEED_NUM_20G:
1971                 return ETH_LINK_SPEED_20G;
1972         case ETH_SPEED_NUM_25G:
1973                 return ETH_LINK_SPEED_25G;
1974         case ETH_SPEED_NUM_40G:
1975                 return ETH_LINK_SPEED_40G;
1976         case ETH_SPEED_NUM_50G:
1977                 return ETH_LINK_SPEED_50G;
1978         case ETH_SPEED_NUM_56G:
1979                 return ETH_LINK_SPEED_56G;
1980         case ETH_SPEED_NUM_100G:
1981                 return ETH_LINK_SPEED_100G;
1982         case ETH_SPEED_NUM_200G:
1983                 return ETH_LINK_SPEED_200G;
1984         default:
1985                 return 0;
1986         }
1987 }
1988
1989 static int vectorized_check_handler(__rte_unused const char *key,
1990                 const char *value, void *ret_val)
1991 {
1992         if (strcmp(value, "1") == 0)
1993                 *(int *)ret_val = 1;
1994         else
1995                 *(int *)ret_val = 0;
1996
1997         return 0;
1998 }
1999
2000 #define VIRTIO_ARG_SPEED      "speed"
2001 #define VIRTIO_ARG_VECTORIZED "vectorized"
2002
2003 static int
2004 link_speed_handler(const char *key __rte_unused,
2005                 const char *value, void *ret_val)
2006 {
2007         uint32_t val;
2008         if (!value || !ret_val)
2009                 return -EINVAL;
2010         val = strtoul(value, NULL, 0);
2011         /* validate input */
2012         if (virtio_dev_speed_capa_get(val) == 0)
2013                 return -EINVAL;
2014         *(uint32_t *)ret_val = val;
2015
2016         return 0;
2017 }
2018
2019
2020 static int
2021 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2022 {
2023         struct rte_kvargs *kvlist;
2024         int ret = 0;
2025
2026         if (devargs == NULL)
2027                 return 0;
2028
2029         kvlist = rte_kvargs_parse(devargs->args, NULL);
2030         if (kvlist == NULL) {
2031                 PMD_INIT_LOG(ERR, "error when parsing param");
2032                 return 0;
2033         }
2034
2035         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2036                 ret = rte_kvargs_process(kvlist,
2037                                         VIRTIO_ARG_SPEED,
2038                                         link_speed_handler, speed);
2039                 if (ret < 0) {
2040                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2041                                         VIRTIO_ARG_SPEED);
2042                         goto exit;
2043                 }
2044         }
2045
2046         if (vectorized &&
2047                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2048                 ret = rte_kvargs_process(kvlist,
2049                                 VIRTIO_ARG_VECTORIZED,
2050                                 vectorized_check_handler, vectorized);
2051                 if (ret < 0) {
2052                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2053                                         VIRTIO_ARG_VECTORIZED);
2054                         goto exit;
2055                 }
2056         }
2057
2058 exit:
2059         rte_kvargs_free(kvlist);
2060         return ret;
2061 }
2062
2063 static uint8_t
2064 rx_offload_enabled(struct virtio_hw *hw)
2065 {
2066         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2067                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2068                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2069 }
2070
2071 static uint8_t
2072 tx_offload_enabled(struct virtio_hw *hw)
2073 {
2074         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2075                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2076                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2077 }
2078
2079 /*
2080  * Configure virtio device
2081  * It returns 0 on success.
2082  */
2083 static int
2084 virtio_dev_configure(struct rte_eth_dev *dev)
2085 {
2086         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2087         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2088         struct virtio_hw *hw = dev->data->dev_private;
2089         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2090                 hw->vtnet_hdr_size;
2091         uint64_t rx_offloads = rxmode->offloads;
2092         uint64_t tx_offloads = txmode->offloads;
2093         uint64_t req_features;
2094         int ret;
2095
2096         PMD_INIT_LOG(DEBUG, "configure");
2097         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2098
2099         if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2100                 PMD_DRV_LOG(ERR,
2101                         "Unsupported Rx multi queue mode %d",
2102                         rxmode->mq_mode);
2103                 return -EINVAL;
2104         }
2105
2106         if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2107                 PMD_DRV_LOG(ERR,
2108                         "Unsupported Tx multi queue mode %d",
2109                         txmode->mq_mode);
2110                 return -EINVAL;
2111         }
2112
2113         if (dev->data->dev_conf.intr_conf.rxq) {
2114                 ret = virtio_init_device(dev, hw->req_guest_features);
2115                 if (ret < 0)
2116                         return ret;
2117         }
2118
2119         if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) &&
2120             (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len))
2121                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2122
2123         if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
2124                 hw->max_rx_pkt_len = rxmode->max_rx_pkt_len;
2125         else
2126                 hw->max_rx_pkt_len = ether_hdr_len + dev->data->mtu;
2127
2128         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2129                            DEV_RX_OFFLOAD_TCP_CKSUM))
2130                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2131
2132         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2133                 req_features |=
2134                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2135                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2136
2137         if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2138                            DEV_TX_OFFLOAD_TCP_CKSUM))
2139                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2140
2141         if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2142                 req_features |=
2143                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2144                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2145
2146         /* if request features changed, reinit the device */
2147         if (req_features != hw->req_guest_features) {
2148                 ret = virtio_init_device(dev, req_features);
2149                 if (ret < 0)
2150                         return ret;
2151         }
2152
2153         if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2154                             DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2155                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2156                 PMD_DRV_LOG(ERR,
2157                         "rx checksum not available on this host");
2158                 return -ENOTSUP;
2159         }
2160
2161         if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2162                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2163                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2164                 PMD_DRV_LOG(ERR,
2165                         "Large Receive Offload not available on this host");
2166                 return -ENOTSUP;
2167         }
2168
2169         /* start control queue */
2170         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2171                 virtio_dev_cq_start(dev);
2172
2173         if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2174                 hw->vlan_strip = 1;
2175
2176         hw->rx_ol_scatter = (rx_offloads & DEV_RX_OFFLOAD_SCATTER);
2177
2178         if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2179                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2180                 PMD_DRV_LOG(ERR,
2181                             "vlan filtering not available on this host");
2182                 return -ENOTSUP;
2183         }
2184
2185         hw->has_tx_offload = tx_offload_enabled(hw);
2186         hw->has_rx_offload = rx_offload_enabled(hw);
2187
2188         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2189                 /* Enable vector (0) for Link State Intrerrupt */
2190                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2191                                 VIRTIO_MSI_NO_VECTOR) {
2192                         PMD_DRV_LOG(ERR, "failed to set config vector");
2193                         return -EBUSY;
2194                 }
2195
2196         if (virtio_with_packed_queue(hw)) {
2197 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2198                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2199                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2200                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2201                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2202                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2203                         PMD_DRV_LOG(INFO,
2204                                 "disabled packed ring vectorized path for requirements not met");
2205                         hw->use_vec_rx = 0;
2206                         hw->use_vec_tx = 0;
2207                 }
2208 #elif defined(RTE_ARCH_ARM)
2209                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2210                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2211                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2212                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2213                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2214                         PMD_DRV_LOG(INFO,
2215                                 "disabled packed ring vectorized path for requirements not met");
2216                         hw->use_vec_rx = 0;
2217                         hw->use_vec_tx = 0;
2218                 }
2219 #else
2220                 hw->use_vec_rx = 0;
2221                 hw->use_vec_tx = 0;
2222 #endif
2223
2224                 if (hw->use_vec_rx) {
2225                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2226                                 PMD_DRV_LOG(INFO,
2227                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2228                                 hw->use_vec_rx = 0;
2229                         }
2230
2231                         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2232                                 PMD_DRV_LOG(INFO,
2233                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2234                                 hw->use_vec_rx = 0;
2235                         }
2236                 }
2237         } else {
2238                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2239                         hw->use_inorder_tx = 1;
2240                         hw->use_inorder_rx = 1;
2241                         hw->use_vec_rx = 0;
2242                 }
2243
2244                 if (hw->use_vec_rx) {
2245 #if defined RTE_ARCH_ARM
2246                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2247                                 PMD_DRV_LOG(INFO,
2248                                         "disabled split ring vectorized path for requirement not met");
2249                                 hw->use_vec_rx = 0;
2250                         }
2251 #endif
2252                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2253                                 PMD_DRV_LOG(INFO,
2254                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2255                                 hw->use_vec_rx = 0;
2256                         }
2257
2258                         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2259                                            DEV_RX_OFFLOAD_TCP_CKSUM |
2260                                            DEV_RX_OFFLOAD_TCP_LRO |
2261                                            DEV_RX_OFFLOAD_VLAN_STRIP)) {
2262                                 PMD_DRV_LOG(INFO,
2263                                         "disabled split ring vectorized rx for offloading enabled");
2264                                 hw->use_vec_rx = 0;
2265                         }
2266
2267                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2268                                 PMD_DRV_LOG(INFO,
2269                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2270                                 hw->use_vec_rx = 0;
2271                         }
2272                 }
2273         }
2274
2275         return 0;
2276 }
2277
2278
2279 static int
2280 virtio_dev_start(struct rte_eth_dev *dev)
2281 {
2282         uint16_t nb_queues, i;
2283         struct virtqueue *vq;
2284         struct virtio_hw *hw = dev->data->dev_private;
2285         int ret;
2286
2287         /* Finish the initialization of the queues */
2288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2289                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2290                 if (ret < 0)
2291                         return ret;
2292         }
2293         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2294                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2295                 if (ret < 0)
2296                         return ret;
2297         }
2298
2299         /* check if lsc interrupt feature is enabled */
2300         if (dev->data->dev_conf.intr_conf.lsc) {
2301                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2302                         PMD_DRV_LOG(ERR, "link status not supported by host");
2303                         return -ENOTSUP;
2304                 }
2305         }
2306
2307         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2308          * in device configure, but it could be unmapped  when device is
2309          * stopped.
2310          */
2311         if (dev->data->dev_conf.intr_conf.lsc ||
2312             dev->data->dev_conf.intr_conf.rxq) {
2313                 virtio_intr_disable(dev);
2314
2315                 /* Setup interrupt callback  */
2316                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2317                         rte_intr_callback_register(dev->intr_handle,
2318                                                    virtio_interrupt_handler,
2319                                                    dev);
2320
2321                 if (virtio_intr_enable(dev) < 0) {
2322                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2323                         return -EIO;
2324                 }
2325         }
2326
2327         /*Notify the backend
2328          *Otherwise the tap backend might already stop its queue due to fullness.
2329          *vhost backend will have no chance to be waked up
2330          */
2331         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2332         if (hw->max_queue_pairs > 1) {
2333                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2334                         return -EINVAL;
2335         }
2336
2337         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2338
2339         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2340                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2341                 /* Flush the old packets */
2342                 virtqueue_rxvq_flush(vq);
2343                 virtqueue_notify(vq);
2344         }
2345
2346         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2347                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2348                 virtqueue_notify(vq);
2349         }
2350
2351         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2352
2353         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2354                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2355                 VIRTQUEUE_DUMP(vq);
2356         }
2357
2358         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2359                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2360                 VIRTQUEUE_DUMP(vq);
2361         }
2362
2363         set_rxtx_funcs(dev);
2364         hw->started = 1;
2365
2366         /* Initialize Link state */
2367         virtio_dev_link_update(dev, 0);
2368
2369         return 0;
2370 }
2371
2372 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2373 {
2374         struct virtio_hw *hw = dev->data->dev_private;
2375         uint16_t nr_vq = virtio_get_nr_vq(hw);
2376         const char *type __rte_unused;
2377         unsigned int i, mbuf_num = 0;
2378         struct virtqueue *vq;
2379         struct rte_mbuf *buf;
2380         int queue_type;
2381
2382         if (hw->vqs == NULL)
2383                 return;
2384
2385         for (i = 0; i < nr_vq; i++) {
2386                 vq = hw->vqs[i];
2387                 if (!vq)
2388                         continue;
2389
2390                 queue_type = virtio_get_queue_type(hw, i);
2391                 if (queue_type == VTNET_RQ)
2392                         type = "rxq";
2393                 else if (queue_type == VTNET_TQ)
2394                         type = "txq";
2395                 else
2396                         continue;
2397
2398                 PMD_INIT_LOG(DEBUG,
2399                         "Before freeing %s[%d] used and unused buf",
2400                         type, i);
2401                 VIRTQUEUE_DUMP(vq);
2402
2403                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2404                         rte_pktmbuf_free(buf);
2405                         mbuf_num++;
2406                 }
2407
2408                 PMD_INIT_LOG(DEBUG,
2409                         "After freeing %s[%d] used and unused buf",
2410                         type, i);
2411                 VIRTQUEUE_DUMP(vq);
2412         }
2413
2414         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2415 }
2416
2417 static void
2418 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2419 {
2420         struct virtio_hw *hw = dev->data->dev_private;
2421         struct virtqueue *vq;
2422         int qidx;
2423         void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2424
2425         if (virtio_with_packed_queue(hw)) {
2426                 if (hw->use_vec_tx)
2427                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2428                 else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2429                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2430                 else
2431                         xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2432         } else {
2433                 if (hw->use_inorder_tx)
2434                         xmit_cleanup = &virtio_xmit_cleanup_inorder;
2435                 else
2436                         xmit_cleanup = &virtio_xmit_cleanup;
2437         }
2438
2439         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2440                 vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2441                 if (vq != NULL)
2442                         xmit_cleanup(vq, virtqueue_nused(vq));
2443         }
2444 }
2445
2446 /*
2447  * Stop device: disable interrupt and mark link down
2448  */
2449 int
2450 virtio_dev_stop(struct rte_eth_dev *dev)
2451 {
2452         struct virtio_hw *hw = dev->data->dev_private;
2453         struct rte_eth_link link;
2454         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2455
2456         PMD_INIT_LOG(DEBUG, "stop");
2457         dev->data->dev_started = 0;
2458
2459         rte_spinlock_lock(&hw->state_lock);
2460         if (!hw->started)
2461                 goto out_unlock;
2462         hw->started = 0;
2463
2464         virtio_tx_completed_cleanup(dev);
2465
2466         if (intr_conf->lsc || intr_conf->rxq) {
2467                 virtio_intr_disable(dev);
2468
2469                 /* Reset interrupt callback  */
2470                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2471                         rte_intr_callback_unregister(dev->intr_handle,
2472                                                      virtio_interrupt_handler,
2473                                                      dev);
2474                 }
2475         }
2476
2477         memset(&link, 0, sizeof(link));
2478         rte_eth_linkstatus_set(dev, &link);
2479 out_unlock:
2480         rte_spinlock_unlock(&hw->state_lock);
2481
2482         return 0;
2483 }
2484
2485 static int
2486 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2487 {
2488         struct rte_eth_link link;
2489         uint16_t status;
2490         struct virtio_hw *hw = dev->data->dev_private;
2491
2492         memset(&link, 0, sizeof(link));
2493         link.link_duplex = hw->duplex;
2494         link.link_speed  = hw->speed;
2495         link.link_autoneg = ETH_LINK_AUTONEG;
2496
2497         if (!hw->started) {
2498                 link.link_status = ETH_LINK_DOWN;
2499                 link.link_speed = ETH_SPEED_NUM_NONE;
2500         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2501                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2502                 virtio_read_dev_config(hw,
2503                                 offsetof(struct virtio_net_config, status),
2504                                 &status, sizeof(status));
2505                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2506                         link.link_status = ETH_LINK_DOWN;
2507                         link.link_speed = ETH_SPEED_NUM_NONE;
2508                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2509                                      dev->data->port_id);
2510                 } else {
2511                         link.link_status = ETH_LINK_UP;
2512                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2513                                      dev->data->port_id);
2514                 }
2515         } else {
2516                 link.link_status = ETH_LINK_UP;
2517         }
2518
2519         return rte_eth_linkstatus_set(dev, &link);
2520 }
2521
2522 static int
2523 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2524 {
2525         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2526         struct virtio_hw *hw = dev->data->dev_private;
2527         uint64_t offloads = rxmode->offloads;
2528
2529         if (mask & ETH_VLAN_FILTER_MASK) {
2530                 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2531                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2532
2533                         PMD_DRV_LOG(NOTICE,
2534                                 "vlan filtering not available on this host");
2535
2536                         return -ENOTSUP;
2537                 }
2538         }
2539
2540         if (mask & ETH_VLAN_STRIP_MASK)
2541                 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2542
2543         return 0;
2544 }
2545
2546 static int
2547 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2548 {
2549         uint64_t tso_mask, host_features;
2550         struct virtio_hw *hw = dev->data->dev_private;
2551         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2552
2553         dev_info->max_rx_queues =
2554                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2555         dev_info->max_tx_queues =
2556                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2557         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2558         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2559         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2560         dev_info->max_mtu = hw->max_mtu;
2561
2562         host_features = VIRTIO_OPS(hw)->get_features(hw);
2563         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2564         dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2565         if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
2566                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SCATTER;
2567         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2568                 dev_info->rx_offload_capa |=
2569                         DEV_RX_OFFLOAD_TCP_CKSUM |
2570                         DEV_RX_OFFLOAD_UDP_CKSUM;
2571         }
2572         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2573                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2574         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2575                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2576         if ((host_features & tso_mask) == tso_mask)
2577                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2578
2579         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2580                                     DEV_TX_OFFLOAD_VLAN_INSERT;
2581         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2582                 dev_info->tx_offload_capa |=
2583                         DEV_TX_OFFLOAD_UDP_CKSUM |
2584                         DEV_TX_OFFLOAD_TCP_CKSUM;
2585         }
2586         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2587                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2588         if ((host_features & tso_mask) == tso_mask)
2589                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2590
2591         if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
2592                 /*
2593                  * According to 2.7 Packed Virtqueues,
2594                  * 2.7.10.1 Structure Size and Alignment:
2595                  * The Queue Size value does not have to be a power of 2.
2596                  */
2597                 dev_info->rx_desc_lim.nb_max = UINT16_MAX;
2598                 dev_info->tx_desc_lim.nb_max = UINT16_MAX;
2599         } else {
2600                 /*
2601                  * According to 2.6 Split Virtqueues:
2602                  * Queue Size value is always a power of 2. The maximum Queue
2603                  * Size value is 32768.
2604                  */
2605                 dev_info->rx_desc_lim.nb_max = 32768;
2606                 dev_info->tx_desc_lim.nb_max = 32768;
2607         }
2608         /*
2609          * Actual minimum is not the same for virtqueues of different kinds,
2610          * but to avoid tangling the code with separate branches, rely on
2611          * default thresholds since desc number must be at least of their size.
2612          */
2613         dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
2614                                                RTE_VIRTIO_VPMD_RX_REARM_THRESH);
2615         dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
2616         dev_info->rx_desc_lim.nb_align = 1;
2617         dev_info->tx_desc_lim.nb_align = 1;
2618
2619         return 0;
2620 }
2621
2622 /*
2623  * It enables testpmd to collect per queue stats.
2624  */
2625 static int
2626 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2627 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2628 __rte_unused uint8_t is_rx)
2629 {
2630         return 0;
2631 }
2632
2633 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
2634 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);