net/virtio: fix control VQ
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_ip.h>
22 #include <rte_arp.h>
23 #include <rte_common.h>
24 #include <rte_errno.h>
25 #include <rte_cpuflags.h>
26
27 #include <rte_memory.h>
28 #include <rte_eal.h>
29 #include <rte_dev.h>
30 #include <rte_cycles.h>
31 #include <rte_kvargs.h>
32
33 #include "virtio_ethdev.h"
34 #include "virtio_pci.h"
35 #include "virtio_logs.h"
36 #include "virtqueue.h"
37 #include "virtio_rxtx.h"
38
39 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
40 static int  virtio_dev_configure(struct rte_eth_dev *dev);
41 static int  virtio_dev_start(struct rte_eth_dev *dev);
42 static void virtio_dev_stop(struct rte_eth_dev *dev);
43 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
44 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
45 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
46 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
47 static void virtio_dev_info_get(struct rte_eth_dev *dev,
48                                 struct rte_eth_dev_info *dev_info);
49 static int virtio_dev_link_update(struct rte_eth_dev *dev,
50         int wait_to_complete);
51 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
52
53 static void virtio_set_hwaddr(struct virtio_hw *hw);
54 static void virtio_get_hwaddr(struct virtio_hw *hw);
55
56 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
57                                  struct rte_eth_stats *stats);
58 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
59                                  struct rte_eth_xstat *xstats, unsigned n);
60 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
61                                        struct rte_eth_xstat_name *xstats_names,
62                                        unsigned limit);
63 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
64 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
65 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
66                                 uint16_t vlan_id, int on);
67 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
68                                 struct ether_addr *mac_addr,
69                                 uint32_t index, uint32_t vmdq);
70 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
71 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
72                                 struct ether_addr *mac_addr);
73
74 static int virtio_intr_enable(struct rte_eth_dev *dev);
75 static int virtio_intr_disable(struct rte_eth_dev *dev);
76
77 static int virtio_dev_queue_stats_mapping_set(
78         struct rte_eth_dev *eth_dev,
79         uint16_t queue_id,
80         uint8_t stat_idx,
81         uint8_t is_rx);
82
83 int virtio_logtype_init;
84 int virtio_logtype_driver;
85
86 static void virtio_notify_peers(struct rte_eth_dev *dev);
87 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
88
89 /*
90  * The set of PCI devices this driver supports
91  */
92 static const struct rte_pci_id pci_id_virtio_map[] = {
93         { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
94         { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
95         { .vendor_id = 0, /* sentinel */ },
96 };
97
98 struct rte_virtio_xstats_name_off {
99         char name[RTE_ETH_XSTATS_NAME_SIZE];
100         unsigned offset;
101 };
102
103 /* [rt]x_qX_ is prepended to the name string here */
104 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
105         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
106         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
107         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
108         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
109         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
110         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
111         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
112         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
113         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
114         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
115         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
116         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
117         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
118 };
119
120 /* [rt]x_qX_ is prepended to the name string here */
121 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
122         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
123         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
124         {"errors",                 offsetof(struct virtnet_tx, stats.errors)},
125         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138                             sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140                             sizeof(rte_virtio_txq_stat_strings[0]))
141
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146                            struct virtio_pmd_ctrl *ctrl,
147                            int *dlen, int pkt_num)
148 {
149         struct virtqueue *vq = cvq->vq;
150         int head;
151         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
152         struct virtio_pmd_ctrl *result;
153         bool avail_wrap_counter;
154         int sum = 0;
155         int nb_descs = 0;
156         int k;
157
158         /*
159          * Format is enforced in qemu code:
160          * One TX packet for header;
161          * At least one TX packet per argument;
162          * One RX packet for ACK.
163          */
164         head = vq->vq_avail_idx;
165         avail_wrap_counter = vq->avail_wrap_counter;
166         desc[head].addr = cvq->virtio_net_hdr_mem;
167         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168         vq->vq_free_cnt--;
169         nb_descs++;
170         if (++vq->vq_avail_idx >= vq->vq_nentries) {
171                 vq->vq_avail_idx -= vq->vq_nentries;
172                 vq->avail_wrap_counter ^= 1;
173         }
174
175         for (k = 0; k < pkt_num; k++) {
176                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177                         + sizeof(struct virtio_net_ctrl_hdr)
178                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179                 desc[vq->vq_avail_idx].len = dlen[k];
180                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181                         VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
182                         VRING_DESC_F_USED(!vq->avail_wrap_counter);
183                 sum += dlen[k];
184                 vq->vq_free_cnt--;
185                 nb_descs++;
186                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
187                         vq->vq_avail_idx -= vq->vq_nentries;
188                         vq->avail_wrap_counter ^= 1;
189                 }
190         }
191
192         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193                 + sizeof(struct virtio_net_ctrl_hdr);
194         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
197                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
198         vq->vq_free_cnt--;
199         nb_descs++;
200         if (++vq->vq_avail_idx >= vq->vq_nentries) {
201                 vq->vq_avail_idx -= vq->vq_nentries;
202                 vq->avail_wrap_counter ^= 1;
203         }
204
205         virtio_wmb(vq->hw->weak_barriers);
206         desc[head].flags = VRING_DESC_F_NEXT |
207                 VRING_DESC_F_AVAIL(avail_wrap_counter) |
208                 VRING_DESC_F_USED(!avail_wrap_counter);
209
210         virtio_wmb(vq->hw->weak_barriers);
211         virtqueue_notify(vq);
212
213         /* wait for used descriptors in virtqueue */
214         while (!desc_is_used(&desc[head], vq))
215                 usleep(100);
216
217         virtio_rmb(vq->hw->weak_barriers);
218
219         /* now get used descriptors */
220         vq->vq_free_cnt += nb_descs;
221         vq->vq_used_cons_idx += nb_descs;
222         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
223                 vq->vq_used_cons_idx -= vq->vq_nentries;
224                 vq->used_wrap_counter ^= 1;
225         }
226
227         result = cvq->virtio_net_hdr_mz->addr;
228         return result;
229 }
230
231 static int
232 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
233                 int *dlen, int pkt_num)
234 {
235         uint32_t head, i;
236         int k, sum = 0;
237         virtio_net_ctrl_ack status = ~0;
238         struct virtio_pmd_ctrl *result;
239         struct virtqueue *vq;
240
241         ctrl->status = status;
242
243         if (!cvq || !cvq->vq) {
244                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
245                 return -1;
246         }
247
248         rte_spinlock_lock(&cvq->lock);
249         vq = cvq->vq;
250         head = vq->vq_desc_head_idx;
251
252         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
253                 "vq->hw->cvq = %p vq = %p",
254                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
255
256         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
257                 rte_spinlock_unlock(&cvq->lock);
258                 return -1;
259         }
260
261         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
262                 sizeof(struct virtio_pmd_ctrl));
263
264         if (vtpci_packed_queue(vq->hw)) {
265                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
266                 goto out_unlock;
267         }
268
269         /*
270          * Format is enforced in qemu code:
271          * One TX packet for header;
272          * At least one TX packet per argument;
273          * One RX packet for ACK.
274          */
275         vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
276         vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
277         vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
278         vq->vq_free_cnt--;
279         i = vq->vq_ring.desc[head].next;
280
281         for (k = 0; k < pkt_num; k++) {
282                 vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
283                 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
284                         + sizeof(struct virtio_net_ctrl_hdr)
285                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
286                 vq->vq_ring.desc[i].len = dlen[k];
287                 sum += dlen[k];
288                 vq->vq_free_cnt--;
289                 i = vq->vq_ring.desc[i].next;
290         }
291
292         vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
293         vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
294                         + sizeof(struct virtio_net_ctrl_hdr);
295         vq->vq_ring.desc[i].len = sizeof(ctrl->status);
296         vq->vq_free_cnt--;
297
298         vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
299
300         vq_update_avail_ring(vq, head);
301         vq_update_avail_idx(vq);
302
303         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
304
305         virtqueue_notify(vq);
306
307         rte_rmb();
308         while (VIRTQUEUE_NUSED(vq) == 0) {
309                 rte_rmb();
310                 usleep(100);
311         }
312
313         while (VIRTQUEUE_NUSED(vq)) {
314                 uint32_t idx, desc_idx, used_idx;
315                 struct vring_used_elem *uep;
316
317                 used_idx = (uint32_t)(vq->vq_used_cons_idx
318                                 & (vq->vq_nentries - 1));
319                 uep = &vq->vq_ring.used->ring[used_idx];
320                 idx = (uint32_t) uep->id;
321                 desc_idx = idx;
322
323                 while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
324                         desc_idx = vq->vq_ring.desc[desc_idx].next;
325                         vq->vq_free_cnt++;
326                 }
327
328                 vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
329                 vq->vq_desc_head_idx = idx;
330
331                 vq->vq_used_cons_idx++;
332                 vq->vq_free_cnt++;
333         }
334
335         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
336                         vq->vq_free_cnt, vq->vq_desc_head_idx);
337
338         result = cvq->virtio_net_hdr_mz->addr;
339
340 out_unlock:
341         rte_spinlock_unlock(&cvq->lock);
342         return result->status;
343 }
344
345 static int
346 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
347 {
348         struct virtio_hw *hw = dev->data->dev_private;
349         struct virtio_pmd_ctrl ctrl;
350         int dlen[1];
351         int ret;
352
353         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
354         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
355         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
356
357         dlen[0] = sizeof(uint16_t);
358
359         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
360         if (ret) {
361                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
362                           "failed, this is too late now...");
363                 return -EINVAL;
364         }
365
366         return 0;
367 }
368
369 static void
370 virtio_dev_queue_release(void *queue __rte_unused)
371 {
372         /* do nothing */
373 }
374
375 static uint16_t
376 virtio_get_nr_vq(struct virtio_hw *hw)
377 {
378         uint16_t nr_vq = hw->max_queue_pairs * 2;
379
380         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
381                 nr_vq += 1;
382
383         return nr_vq;
384 }
385
386 static void
387 virtio_init_vring(struct virtqueue *vq)
388 {
389         int size = vq->vq_nentries;
390         struct vring *vr = &vq->vq_ring;
391         uint8_t *ring_mem = vq->vq_ring_virt_mem;
392
393         PMD_INIT_FUNC_TRACE();
394
395         memset(ring_mem, 0, vq->vq_ring_size);
396
397         vq->vq_used_cons_idx = 0;
398         vq->vq_desc_head_idx = 0;
399         vq->vq_avail_idx = 0;
400         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
401         vq->vq_free_cnt = vq->vq_nentries;
402         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
403         if (vtpci_packed_queue(vq->hw)) {
404                 vring_init_packed(&vq->ring_packed, ring_mem,
405                                   VIRTIO_PCI_VRING_ALIGN, size);
406                 vring_desc_init_packed(vq, size);
407         } else {
408                 vring_init_split(vr, ring_mem, VIRTIO_PCI_VRING_ALIGN, size);
409                 vring_desc_init_split(vr->desc, size);
410         }
411         /*
412          * Disable device(host) interrupting guest
413          */
414         virtqueue_disable_intr(vq);
415 }
416
417 static int
418 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
419 {
420         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
421         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
422         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
423         unsigned int vq_size, size;
424         struct virtio_hw *hw = dev->data->dev_private;
425         struct virtnet_rx *rxvq = NULL;
426         struct virtnet_tx *txvq = NULL;
427         struct virtnet_ctl *cvq = NULL;
428         struct virtqueue *vq;
429         size_t sz_hdr_mz = 0;
430         void *sw_ring = NULL;
431         int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
432         int ret;
433         int numa_node = dev->device->numa_node;
434
435         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
436                         vtpci_queue_idx, numa_node);
437
438         /*
439          * Read the virtqueue size from the Queue Size field
440          * Always power of 2 and if 0 virtqueue does not exist
441          */
442         vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
443         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
444         if (vq_size == 0) {
445                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
446                 return -EINVAL;
447         }
448
449         if (!rte_is_power_of_2(vq_size)) {
450                 PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
451                 return -EINVAL;
452         }
453
454         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
455                  dev->data->port_id, vtpci_queue_idx);
456
457         size = RTE_ALIGN_CEIL(sizeof(*vq) +
458                                 vq_size * sizeof(struct vq_desc_extra),
459                                 RTE_CACHE_LINE_SIZE);
460         if (queue_type == VTNET_TQ) {
461                 /*
462                  * For each xmit packet, allocate a virtio_net_hdr
463                  * and indirect ring elements
464                  */
465                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
466         } else if (queue_type == VTNET_CQ) {
467                 /* Allocate a page for control vq command, data and status */
468                 sz_hdr_mz = PAGE_SIZE;
469         }
470
471         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
472                                 numa_node);
473         if (vq == NULL) {
474                 PMD_INIT_LOG(ERR, "can not allocate vq");
475                 return -ENOMEM;
476         }
477         hw->vqs[vtpci_queue_idx] = vq;
478
479         vq->hw = hw;
480         vq->vq_queue_index = vtpci_queue_idx;
481         vq->vq_nentries = vq_size;
482         vq->event_flags_shadow = 0;
483         if (vtpci_packed_queue(hw)) {
484                 vq->avail_wrap_counter = 1;
485                 vq->used_wrap_counter = 1;
486                 vq->avail_used_flags =
487                         VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
488                         VRING_DESC_F_USED(!vq->avail_wrap_counter);
489         }
490
491         /*
492          * Reserve a memzone for vring elements
493          */
494         size = vring_size(hw, vq_size, VIRTIO_PCI_VRING_ALIGN);
495         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
496         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
497                      size, vq->vq_ring_size);
498
499         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
500                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
501                         VIRTIO_PCI_VRING_ALIGN);
502         if (mz == NULL) {
503                 if (rte_errno == EEXIST)
504                         mz = rte_memzone_lookup(vq_name);
505                 if (mz == NULL) {
506                         ret = -ENOMEM;
507                         goto fail_q_alloc;
508                 }
509         }
510
511         memset(mz->addr, 0, mz->len);
512
513         vq->vq_ring_mem = mz->iova;
514         vq->vq_ring_virt_mem = mz->addr;
515         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
516                      (uint64_t)mz->iova);
517         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
518                      (uint64_t)(uintptr_t)mz->addr);
519
520         virtio_init_vring(vq);
521
522         if (sz_hdr_mz) {
523                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
524                          dev->data->port_id, vtpci_queue_idx);
525                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
526                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
527                                 RTE_CACHE_LINE_SIZE);
528                 if (hdr_mz == NULL) {
529                         if (rte_errno == EEXIST)
530                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
531                         if (hdr_mz == NULL) {
532                                 ret = -ENOMEM;
533                                 goto fail_q_alloc;
534                         }
535                 }
536         }
537
538         if (queue_type == VTNET_RQ) {
539                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
540                                sizeof(vq->sw_ring[0]);
541
542                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
543                                 RTE_CACHE_LINE_SIZE, numa_node);
544                 if (!sw_ring) {
545                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
546                         ret = -ENOMEM;
547                         goto fail_q_alloc;
548                 }
549
550                 vq->sw_ring = sw_ring;
551                 rxvq = &vq->rxq;
552                 rxvq->vq = vq;
553                 rxvq->port_id = dev->data->port_id;
554                 rxvq->mz = mz;
555         } else if (queue_type == VTNET_TQ) {
556                 txvq = &vq->txq;
557                 txvq->vq = vq;
558                 txvq->port_id = dev->data->port_id;
559                 txvq->mz = mz;
560                 txvq->virtio_net_hdr_mz = hdr_mz;
561                 txvq->virtio_net_hdr_mem = hdr_mz->iova;
562         } else if (queue_type == VTNET_CQ) {
563                 cvq = &vq->cq;
564                 cvq->vq = vq;
565                 cvq->mz = mz;
566                 cvq->virtio_net_hdr_mz = hdr_mz;
567                 cvq->virtio_net_hdr_mem = hdr_mz->iova;
568                 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
569
570                 hw->cvq = cvq;
571         }
572
573         /* For virtio_user case (that is when hw->dev is NULL), we use
574          * virtual address. And we need properly set _offset_, please see
575          * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
576          */
577         if (!hw->virtio_user_dev)
578                 vq->offset = offsetof(struct rte_mbuf, buf_iova);
579         else {
580                 vq->vq_ring_mem = (uintptr_t)mz->addr;
581                 vq->offset = offsetof(struct rte_mbuf, buf_addr);
582                 if (queue_type == VTNET_TQ)
583                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
584                 else if (queue_type == VTNET_CQ)
585                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
586         }
587
588         if (queue_type == VTNET_TQ) {
589                 struct virtio_tx_region *txr;
590                 unsigned int i;
591
592                 txr = hdr_mz->addr;
593                 memset(txr, 0, vq_size * sizeof(*txr));
594                 for (i = 0; i < vq_size; i++) {
595                         struct vring_desc *start_dp = txr[i].tx_indir;
596                         struct vring_packed_desc *start_dp_packed =
597                                 txr[i].tx_indir_pq;
598
599                         /* first indirect descriptor is always the tx header */
600                         if (vtpci_packed_queue(hw)) {
601                                 start_dp_packed->addr = txvq->virtio_net_hdr_mem
602                                         + i * sizeof(*txr)
603                                         + offsetof(struct virtio_tx_region,
604                                                    tx_hdr);
605                                 start_dp_packed->len = hw->vtnet_hdr_size;
606                         } else {
607                                 vring_desc_init_split(start_dp,
608                                                       RTE_DIM(txr[i].tx_indir));
609                                 start_dp->addr = txvq->virtio_net_hdr_mem
610                                         + i * sizeof(*txr)
611                                         + offsetof(struct virtio_tx_region,
612                                                    tx_hdr);
613                                 start_dp->len = hw->vtnet_hdr_size;
614                                 start_dp->flags = VRING_DESC_F_NEXT;
615                         }
616                 }
617         }
618
619         if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
620                 PMD_INIT_LOG(ERR, "setup_queue failed");
621                 return -EINVAL;
622         }
623
624         return 0;
625
626 fail_q_alloc:
627         rte_free(sw_ring);
628         rte_memzone_free(hdr_mz);
629         rte_memzone_free(mz);
630         rte_free(vq);
631
632         return ret;
633 }
634
635 static void
636 virtio_free_queues(struct virtio_hw *hw)
637 {
638         uint16_t nr_vq = virtio_get_nr_vq(hw);
639         struct virtqueue *vq;
640         int queue_type;
641         uint16_t i;
642
643         if (hw->vqs == NULL)
644                 return;
645
646         for (i = 0; i < nr_vq; i++) {
647                 vq = hw->vqs[i];
648                 if (!vq)
649                         continue;
650
651                 queue_type = virtio_get_queue_type(hw, i);
652                 if (queue_type == VTNET_RQ) {
653                         rte_free(vq->sw_ring);
654                         rte_memzone_free(vq->rxq.mz);
655                 } else if (queue_type == VTNET_TQ) {
656                         rte_memzone_free(vq->txq.mz);
657                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
658                 } else {
659                         rte_memzone_free(vq->cq.mz);
660                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
661                 }
662
663                 rte_free(vq);
664                 hw->vqs[i] = NULL;
665         }
666
667         rte_free(hw->vqs);
668         hw->vqs = NULL;
669 }
670
671 static int
672 virtio_alloc_queues(struct rte_eth_dev *dev)
673 {
674         struct virtio_hw *hw = dev->data->dev_private;
675         uint16_t nr_vq = virtio_get_nr_vq(hw);
676         uint16_t i;
677         int ret;
678
679         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
680         if (!hw->vqs) {
681                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
682                 return -ENOMEM;
683         }
684
685         for (i = 0; i < nr_vq; i++) {
686                 ret = virtio_init_queue(dev, i);
687                 if (ret < 0) {
688                         virtio_free_queues(hw);
689                         return ret;
690                 }
691         }
692
693         return 0;
694 }
695
696 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
697
698 static void
699 virtio_dev_close(struct rte_eth_dev *dev)
700 {
701         struct virtio_hw *hw = dev->data->dev_private;
702         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
703
704         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
705
706         if (!hw->opened)
707                 return;
708         hw->opened = false;
709
710         /* reset the NIC */
711         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
712                 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
713         if (intr_conf->rxq)
714                 virtio_queues_unbind_intr(dev);
715
716         if (intr_conf->lsc || intr_conf->rxq) {
717                 virtio_intr_disable(dev);
718                 rte_intr_efd_disable(dev->intr_handle);
719                 rte_free(dev->intr_handle->intr_vec);
720                 dev->intr_handle->intr_vec = NULL;
721         }
722
723         vtpci_reset(hw);
724         virtio_dev_free_mbufs(dev);
725         virtio_free_queues(hw);
726 }
727
728 static void
729 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
730 {
731         struct virtio_hw *hw = dev->data->dev_private;
732         struct virtio_pmd_ctrl ctrl;
733         int dlen[1];
734         int ret;
735
736         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
737                 PMD_INIT_LOG(INFO, "host does not support rx control");
738                 return;
739         }
740
741         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
742         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
743         ctrl.data[0] = 1;
744         dlen[0] = 1;
745
746         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
747         if (ret)
748                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
749 }
750
751 static void
752 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
753 {
754         struct virtio_hw *hw = dev->data->dev_private;
755         struct virtio_pmd_ctrl ctrl;
756         int dlen[1];
757         int ret;
758
759         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
760                 PMD_INIT_LOG(INFO, "host does not support rx control");
761                 return;
762         }
763
764         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
765         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
766         ctrl.data[0] = 0;
767         dlen[0] = 1;
768
769         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
770         if (ret)
771                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
772 }
773
774 static void
775 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
776 {
777         struct virtio_hw *hw = dev->data->dev_private;
778         struct virtio_pmd_ctrl ctrl;
779         int dlen[1];
780         int ret;
781
782         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
783                 PMD_INIT_LOG(INFO, "host does not support rx control");
784                 return;
785         }
786
787         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
788         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
789         ctrl.data[0] = 1;
790         dlen[0] = 1;
791
792         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
793         if (ret)
794                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
795 }
796
797 static void
798 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
799 {
800         struct virtio_hw *hw = dev->data->dev_private;
801         struct virtio_pmd_ctrl ctrl;
802         int dlen[1];
803         int ret;
804
805         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
806                 PMD_INIT_LOG(INFO, "host does not support rx control");
807                 return;
808         }
809
810         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
811         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
812         ctrl.data[0] = 0;
813         dlen[0] = 1;
814
815         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
816         if (ret)
817                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
818 }
819
820 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
821 static int
822 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
823 {
824         struct virtio_hw *hw = dev->data->dev_private;
825         uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN +
826                                  hw->vtnet_hdr_size;
827         uint32_t frame_size = mtu + ether_hdr_len;
828         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
829
830         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
831
832         if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) {
833                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
834                         ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
835                 return -EINVAL;
836         }
837         return 0;
838 }
839
840 static int
841 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
842 {
843         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
844         struct virtqueue *vq = rxvq->vq;
845
846         virtqueue_enable_intr(vq);
847         return 0;
848 }
849
850 static int
851 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
852 {
853         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
854         struct virtqueue *vq = rxvq->vq;
855
856         virtqueue_disable_intr(vq);
857         return 0;
858 }
859
860 /*
861  * dev_ops for virtio, bare necessities for basic operation
862  */
863 static const struct eth_dev_ops virtio_eth_dev_ops = {
864         .dev_configure           = virtio_dev_configure,
865         .dev_start               = virtio_dev_start,
866         .dev_stop                = virtio_dev_stop,
867         .dev_close               = virtio_dev_close,
868         .promiscuous_enable      = virtio_dev_promiscuous_enable,
869         .promiscuous_disable     = virtio_dev_promiscuous_disable,
870         .allmulticast_enable     = virtio_dev_allmulticast_enable,
871         .allmulticast_disable    = virtio_dev_allmulticast_disable,
872         .mtu_set                 = virtio_mtu_set,
873         .dev_infos_get           = virtio_dev_info_get,
874         .stats_get               = virtio_dev_stats_get,
875         .xstats_get              = virtio_dev_xstats_get,
876         .xstats_get_names        = virtio_dev_xstats_get_names,
877         .stats_reset             = virtio_dev_stats_reset,
878         .xstats_reset            = virtio_dev_stats_reset,
879         .link_update             = virtio_dev_link_update,
880         .vlan_offload_set        = virtio_dev_vlan_offload_set,
881         .rx_queue_setup          = virtio_dev_rx_queue_setup,
882         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
883         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
884         .rx_queue_release        = virtio_dev_queue_release,
885         .rx_descriptor_done      = virtio_dev_rx_queue_done,
886         .tx_queue_setup          = virtio_dev_tx_queue_setup,
887         .tx_queue_release        = virtio_dev_queue_release,
888         /* collect stats per queue */
889         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
890         .vlan_filter_set         = virtio_vlan_filter_set,
891         .mac_addr_add            = virtio_mac_addr_add,
892         .mac_addr_remove         = virtio_mac_addr_remove,
893         .mac_addr_set            = virtio_mac_addr_set,
894 };
895
896 static void
897 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
898 {
899         unsigned i;
900
901         for (i = 0; i < dev->data->nb_tx_queues; i++) {
902                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
903                 if (txvq == NULL)
904                         continue;
905
906                 stats->opackets += txvq->stats.packets;
907                 stats->obytes += txvq->stats.bytes;
908                 stats->oerrors += txvq->stats.errors;
909
910                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
911                         stats->q_opackets[i] = txvq->stats.packets;
912                         stats->q_obytes[i] = txvq->stats.bytes;
913                 }
914         }
915
916         for (i = 0; i < dev->data->nb_rx_queues; i++) {
917                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
918                 if (rxvq == NULL)
919                         continue;
920
921                 stats->ipackets += rxvq->stats.packets;
922                 stats->ibytes += rxvq->stats.bytes;
923                 stats->ierrors += rxvq->stats.errors;
924
925                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
926                         stats->q_ipackets[i] = rxvq->stats.packets;
927                         stats->q_ibytes[i] = rxvq->stats.bytes;
928                 }
929         }
930
931         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
932 }
933
934 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
935                                        struct rte_eth_xstat_name *xstats_names,
936                                        __rte_unused unsigned limit)
937 {
938         unsigned i;
939         unsigned count = 0;
940         unsigned t;
941
942         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
943                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
944
945         if (xstats_names != NULL) {
946                 /* Note: limit checked in rte_eth_xstats_names() */
947
948                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
949                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
950                         if (rxvq == NULL)
951                                 continue;
952                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
953                                 snprintf(xstats_names[count].name,
954                                         sizeof(xstats_names[count].name),
955                                         "rx_q%u_%s", i,
956                                         rte_virtio_rxq_stat_strings[t].name);
957                                 count++;
958                         }
959                 }
960
961                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
962                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
963                         if (txvq == NULL)
964                                 continue;
965                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
966                                 snprintf(xstats_names[count].name,
967                                         sizeof(xstats_names[count].name),
968                                         "tx_q%u_%s", i,
969                                         rte_virtio_txq_stat_strings[t].name);
970                                 count++;
971                         }
972                 }
973                 return count;
974         }
975         return nstats;
976 }
977
978 static int
979 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
980                       unsigned n)
981 {
982         unsigned i;
983         unsigned count = 0;
984
985         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
986                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
987
988         if (n < nstats)
989                 return nstats;
990
991         for (i = 0; i < dev->data->nb_rx_queues; i++) {
992                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
993
994                 if (rxvq == NULL)
995                         continue;
996
997                 unsigned t;
998
999                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1000                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1001                                 rte_virtio_rxq_stat_strings[t].offset);
1002                         xstats[count].id = count;
1003                         count++;
1004                 }
1005         }
1006
1007         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1008                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1009
1010                 if (txvq == NULL)
1011                         continue;
1012
1013                 unsigned t;
1014
1015                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1016                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1017                                 rte_virtio_txq_stat_strings[t].offset);
1018                         xstats[count].id = count;
1019                         count++;
1020                 }
1021         }
1022
1023         return count;
1024 }
1025
1026 static int
1027 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1028 {
1029         virtio_update_stats(dev, stats);
1030
1031         return 0;
1032 }
1033
1034 static void
1035 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1036 {
1037         unsigned int i;
1038
1039         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1040                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1041                 if (txvq == NULL)
1042                         continue;
1043
1044                 txvq->stats.packets = 0;
1045                 txvq->stats.bytes = 0;
1046                 txvq->stats.errors = 0;
1047                 txvq->stats.multicast = 0;
1048                 txvq->stats.broadcast = 0;
1049                 memset(txvq->stats.size_bins, 0,
1050                        sizeof(txvq->stats.size_bins[0]) * 8);
1051         }
1052
1053         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1054                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1055                 if (rxvq == NULL)
1056                         continue;
1057
1058                 rxvq->stats.packets = 0;
1059                 rxvq->stats.bytes = 0;
1060                 rxvq->stats.errors = 0;
1061                 rxvq->stats.multicast = 0;
1062                 rxvq->stats.broadcast = 0;
1063                 memset(rxvq->stats.size_bins, 0,
1064                        sizeof(rxvq->stats.size_bins[0]) * 8);
1065         }
1066 }
1067
1068 static void
1069 virtio_set_hwaddr(struct virtio_hw *hw)
1070 {
1071         vtpci_write_dev_config(hw,
1072                         offsetof(struct virtio_net_config, mac),
1073                         &hw->mac_addr, ETHER_ADDR_LEN);
1074 }
1075
1076 static void
1077 virtio_get_hwaddr(struct virtio_hw *hw)
1078 {
1079         if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1080                 vtpci_read_dev_config(hw,
1081                         offsetof(struct virtio_net_config, mac),
1082                         &hw->mac_addr, ETHER_ADDR_LEN);
1083         } else {
1084                 eth_random_addr(&hw->mac_addr[0]);
1085                 virtio_set_hwaddr(hw);
1086         }
1087 }
1088
1089 static int
1090 virtio_mac_table_set(struct virtio_hw *hw,
1091                      const struct virtio_net_ctrl_mac *uc,
1092                      const struct virtio_net_ctrl_mac *mc)
1093 {
1094         struct virtio_pmd_ctrl ctrl;
1095         int err, len[2];
1096
1097         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1098                 PMD_DRV_LOG(INFO, "host does not support mac table");
1099                 return -1;
1100         }
1101
1102         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1103         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1104
1105         len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
1106         memcpy(ctrl.data, uc, len[0]);
1107
1108         len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
1109         memcpy(ctrl.data + len[0], mc, len[1]);
1110
1111         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1112         if (err != 0)
1113                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1114         return err;
1115 }
1116
1117 static int
1118 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
1119                     uint32_t index, uint32_t vmdq __rte_unused)
1120 {
1121         struct virtio_hw *hw = dev->data->dev_private;
1122         const struct ether_addr *addrs = dev->data->mac_addrs;
1123         unsigned int i;
1124         struct virtio_net_ctrl_mac *uc, *mc;
1125
1126         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1127                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1128                 return -EINVAL;
1129         }
1130
1131         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1132         uc->entries = 0;
1133         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1134         mc->entries = 0;
1135
1136         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1137                 const struct ether_addr *addr
1138                         = (i == index) ? mac_addr : addrs + i;
1139                 struct virtio_net_ctrl_mac *tbl
1140                         = is_multicast_ether_addr(addr) ? mc : uc;
1141
1142                 memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
1143         }
1144
1145         return virtio_mac_table_set(hw, uc, mc);
1146 }
1147
1148 static void
1149 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1150 {
1151         struct virtio_hw *hw = dev->data->dev_private;
1152         struct ether_addr *addrs = dev->data->mac_addrs;
1153         struct virtio_net_ctrl_mac *uc, *mc;
1154         unsigned int i;
1155
1156         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1157                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1158                 return;
1159         }
1160
1161         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
1162         uc->entries = 0;
1163         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
1164         mc->entries = 0;
1165
1166         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1167                 struct virtio_net_ctrl_mac *tbl;
1168
1169                 if (i == index || is_zero_ether_addr(addrs + i))
1170                         continue;
1171
1172                 tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
1173                 memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
1174         }
1175
1176         virtio_mac_table_set(hw, uc, mc);
1177 }
1178
1179 static int
1180 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
1181 {
1182         struct virtio_hw *hw = dev->data->dev_private;
1183
1184         memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
1185
1186         /* Use atomic update if available */
1187         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1188                 struct virtio_pmd_ctrl ctrl;
1189                 int len = ETHER_ADDR_LEN;
1190
1191                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1192                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1193
1194                 memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
1195                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1196         }
1197
1198         if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1199                 return -ENOTSUP;
1200
1201         virtio_set_hwaddr(hw);
1202         return 0;
1203 }
1204
1205 static int
1206 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1207 {
1208         struct virtio_hw *hw = dev->data->dev_private;
1209         struct virtio_pmd_ctrl ctrl;
1210         int len;
1211
1212         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1213                 return -ENOTSUP;
1214
1215         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1216         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1217         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1218         len = sizeof(vlan_id);
1219
1220         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1221 }
1222
1223 static int
1224 virtio_intr_enable(struct rte_eth_dev *dev)
1225 {
1226         struct virtio_hw *hw = dev->data->dev_private;
1227
1228         if (rte_intr_enable(dev->intr_handle) < 0)
1229                 return -1;
1230
1231         if (!hw->virtio_user_dev)
1232                 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1233
1234         return 0;
1235 }
1236
1237 static int
1238 virtio_intr_disable(struct rte_eth_dev *dev)
1239 {
1240         struct virtio_hw *hw = dev->data->dev_private;
1241
1242         if (rte_intr_disable(dev->intr_handle) < 0)
1243                 return -1;
1244
1245         if (!hw->virtio_user_dev)
1246                 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1247
1248         return 0;
1249 }
1250
1251 static int
1252 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1253 {
1254         uint64_t host_features;
1255
1256         /* Prepare guest_features: feature that driver wants to support */
1257         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1258                 req_features);
1259
1260         /* Read device(host) feature bits */
1261         host_features = VTPCI_OPS(hw)->get_features(hw);
1262         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1263                 host_features);
1264
1265         /* If supported, ensure MTU value is valid before acknowledging it. */
1266         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1267                 struct virtio_net_config config;
1268
1269                 vtpci_read_dev_config(hw,
1270                         offsetof(struct virtio_net_config, mtu),
1271                         &config.mtu, sizeof(config.mtu));
1272
1273                 if (config.mtu < ETHER_MIN_MTU)
1274                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1275         }
1276
1277         /*
1278          * Negotiate features: Subset of device feature bits are written back
1279          * guest feature bits.
1280          */
1281         hw->guest_features = req_features;
1282         hw->guest_features = vtpci_negotiate_features(hw, host_features);
1283         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1284                 hw->guest_features);
1285
1286         if (hw->modern) {
1287                 if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1288                         PMD_INIT_LOG(ERR,
1289                                 "VIRTIO_F_VERSION_1 features is not enabled.");
1290                         return -1;
1291                 }
1292                 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1293                 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1294                         PMD_INIT_LOG(ERR,
1295                                 "failed to set FEATURES_OK status!");
1296                         return -1;
1297                 }
1298         }
1299
1300         hw->req_guest_features = req_features;
1301
1302         return 0;
1303 }
1304
1305 int
1306 virtio_dev_pause(struct rte_eth_dev *dev)
1307 {
1308         struct virtio_hw *hw = dev->data->dev_private;
1309
1310         rte_spinlock_lock(&hw->state_lock);
1311
1312         if (hw->started == 0) {
1313                 /* Device is just stopped. */
1314                 rte_spinlock_unlock(&hw->state_lock);
1315                 return -1;
1316         }
1317         hw->started = 0;
1318         /*
1319          * Prevent the worker threads from touching queues to avoid contention,
1320          * 1 ms should be enough for the ongoing Tx function to finish.
1321          */
1322         rte_delay_ms(1);
1323         return 0;
1324 }
1325
1326 /*
1327  * Recover hw state to let the worker threads continue.
1328  */
1329 void
1330 virtio_dev_resume(struct rte_eth_dev *dev)
1331 {
1332         struct virtio_hw *hw = dev->data->dev_private;
1333
1334         hw->started = 1;
1335         rte_spinlock_unlock(&hw->state_lock);
1336 }
1337
1338 /*
1339  * Should be called only after device is paused.
1340  */
1341 int
1342 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1343                 int nb_pkts)
1344 {
1345         struct virtio_hw *hw = dev->data->dev_private;
1346         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1347         int ret;
1348
1349         hw->inject_pkts = tx_pkts;
1350         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1351         hw->inject_pkts = NULL;
1352
1353         return ret;
1354 }
1355
1356 static void
1357 virtio_notify_peers(struct rte_eth_dev *dev)
1358 {
1359         struct virtio_hw *hw = dev->data->dev_private;
1360         struct virtnet_rx *rxvq;
1361         struct rte_mbuf *rarp_mbuf;
1362
1363         if (!dev->data->rx_queues)
1364                 return;
1365
1366         rxvq = dev->data->rx_queues[0];
1367         if (!rxvq)
1368                 return;
1369
1370         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1371                         (struct ether_addr *)hw->mac_addr);
1372         if (rarp_mbuf == NULL) {
1373                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1374                 return;
1375         }
1376
1377         /* If virtio port just stopped, no need to send RARP */
1378         if (virtio_dev_pause(dev) < 0) {
1379                 rte_pktmbuf_free(rarp_mbuf);
1380                 return;
1381         }
1382
1383         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1384         virtio_dev_resume(dev);
1385 }
1386
1387 static void
1388 virtio_ack_link_announce(struct rte_eth_dev *dev)
1389 {
1390         struct virtio_hw *hw = dev->data->dev_private;
1391         struct virtio_pmd_ctrl ctrl;
1392
1393         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1394         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1395
1396         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1397 }
1398
1399 /*
1400  * Process virtio config changed interrupt. Call the callback
1401  * if link state changed, generate gratuitous RARP packet if
1402  * the status indicates an ANNOUNCE.
1403  */
1404 void
1405 virtio_interrupt_handler(void *param)
1406 {
1407         struct rte_eth_dev *dev = param;
1408         struct virtio_hw *hw = dev->data->dev_private;
1409         uint8_t isr;
1410         uint16_t status;
1411
1412         /* Read interrupt status which clears interrupt */
1413         isr = vtpci_isr(hw);
1414         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1415
1416         if (virtio_intr_enable(dev) < 0)
1417                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1418
1419         if (isr & VIRTIO_PCI_ISR_CONFIG) {
1420                 if (virtio_dev_link_update(dev, 0) == 0)
1421                         _rte_eth_dev_callback_process(dev,
1422                                                       RTE_ETH_EVENT_INTR_LSC,
1423                                                       NULL);
1424
1425                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1426                         vtpci_read_dev_config(hw,
1427                                 offsetof(struct virtio_net_config, status),
1428                                 &status, sizeof(status));
1429                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1430                                 virtio_notify_peers(dev);
1431                                 if (hw->cvq)
1432                                         virtio_ack_link_announce(dev);
1433                         }
1434                 }
1435         }
1436 }
1437
1438 /* set rx and tx handlers according to what is supported */
1439 static void
1440 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1441 {
1442         struct virtio_hw *hw = eth_dev->data->dev_private;
1443
1444         if (vtpci_packed_queue(hw)) {
1445                 PMD_INIT_LOG(INFO,
1446                         "virtio: using packed ring standard Tx path on port %u",
1447                         eth_dev->data->port_id);
1448                 eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1449         } else {
1450                 if (hw->use_inorder_tx) {
1451                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1452                                 eth_dev->data->port_id);
1453                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1454                 } else {
1455                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1456                                 eth_dev->data->port_id);
1457                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1458                 }
1459         }
1460
1461         if (vtpci_packed_queue(hw)) {
1462                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1463                         PMD_INIT_LOG(INFO,
1464                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1465                                 eth_dev->data->port_id);
1466                         eth_dev->rx_pkt_burst =
1467                                 &virtio_recv_mergeable_pkts_packed;
1468                 } else {
1469                         PMD_INIT_LOG(INFO,
1470                                 "virtio: using packed ring standard Rx path on port %u",
1471                                 eth_dev->data->port_id);
1472                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1473                 }
1474         } else {
1475                 if (hw->use_simple_rx) {
1476                         PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
1477                                 eth_dev->data->port_id);
1478                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1479                 } else if (hw->use_inorder_rx) {
1480                         PMD_INIT_LOG(INFO,
1481                                 "virtio: using inorder Rx path on port %u",
1482                                 eth_dev->data->port_id);
1483                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1484                 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1485                         PMD_INIT_LOG(INFO,
1486                                 "virtio: using mergeable buffer Rx path on port %u",
1487                                 eth_dev->data->port_id);
1488                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1489                 } else {
1490                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1491                                 eth_dev->data->port_id);
1492                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1493                 }
1494         }
1495
1496 }
1497
1498 /* Only support 1:1 queue/interrupt mapping so far.
1499  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1500  * interrupt vectors (<N+1).
1501  */
1502 static int
1503 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1504 {
1505         uint32_t i;
1506         struct virtio_hw *hw = dev->data->dev_private;
1507
1508         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1509         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1510                 dev->intr_handle->intr_vec[i] = i + 1;
1511                 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1512                                                  VIRTIO_MSI_NO_VECTOR) {
1513                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1514                         return -EBUSY;
1515                 }
1516         }
1517
1518         return 0;
1519 }
1520
1521 static void
1522 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1523 {
1524         uint32_t i;
1525         struct virtio_hw *hw = dev->data->dev_private;
1526
1527         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1528         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1529                 VTPCI_OPS(hw)->set_queue_irq(hw,
1530                                              hw->vqs[i * VTNET_CQ],
1531                                              VIRTIO_MSI_NO_VECTOR);
1532 }
1533
1534 static int
1535 virtio_configure_intr(struct rte_eth_dev *dev)
1536 {
1537         struct virtio_hw *hw = dev->data->dev_private;
1538
1539         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1540                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1541                 return -ENOTSUP;
1542         }
1543
1544         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1545                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1546                 return -1;
1547         }
1548
1549         if (!dev->intr_handle->intr_vec) {
1550                 dev->intr_handle->intr_vec =
1551                         rte_zmalloc("intr_vec",
1552                                     hw->max_queue_pairs * sizeof(int), 0);
1553                 if (!dev->intr_handle->intr_vec) {
1554                         PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1555                                      hw->max_queue_pairs);
1556                         return -ENOMEM;
1557                 }
1558         }
1559
1560         /* Re-register callback to update max_intr */
1561         rte_intr_callback_unregister(dev->intr_handle,
1562                                      virtio_interrupt_handler,
1563                                      dev);
1564         rte_intr_callback_register(dev->intr_handle,
1565                                    virtio_interrupt_handler,
1566                                    dev);
1567
1568         /* DO NOT try to remove this! This function will enable msix, or QEMU
1569          * will encounter SIGSEGV when DRIVER_OK is sent.
1570          * And for legacy devices, this should be done before queue/vec binding
1571          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1572          * (22) will be ignored.
1573          */
1574         if (virtio_intr_enable(dev) < 0) {
1575                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1576                 return -1;
1577         }
1578
1579         if (virtio_queues_bind_intr(dev) < 0) {
1580                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1581                 return -1;
1582         }
1583
1584         return 0;
1585 }
1586
1587 /* reset device and renegotiate features if needed */
1588 static int
1589 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1590 {
1591         struct virtio_hw *hw = eth_dev->data->dev_private;
1592         struct virtio_net_config *config;
1593         struct virtio_net_config local_config;
1594         struct rte_pci_device *pci_dev = NULL;
1595         int ret;
1596
1597         /* Reset the device although not necessary at startup */
1598         vtpci_reset(hw);
1599
1600         if (hw->vqs) {
1601                 virtio_dev_free_mbufs(eth_dev);
1602                 virtio_free_queues(hw);
1603         }
1604
1605         /* Tell the host we've noticed this device. */
1606         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1607
1608         /* Tell the host we've known how to drive the device. */
1609         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1610         if (virtio_negotiate_features(hw, req_features) < 0)
1611                 return -1;
1612
1613         hw->weak_barriers = !vtpci_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1614
1615         if (!hw->virtio_user_dev) {
1616                 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1617                 rte_eth_copy_pci_info(eth_dev, pci_dev);
1618         }
1619
1620         /* If host does not support both status and MSI-X then disable LSC */
1621         if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1622             hw->use_msix != VIRTIO_MSIX_NONE)
1623                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1624         else
1625                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1626
1627         /* Setting up rx_header size for the device */
1628         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1629             vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
1630             vtpci_with_feature(hw, VIRTIO_F_RING_PACKED))
1631                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1632         else
1633                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1634
1635         /* Copy the permanent MAC address to: virtio_hw */
1636         virtio_get_hwaddr(hw);
1637         ether_addr_copy((struct ether_addr *) hw->mac_addr,
1638                         &eth_dev->data->mac_addrs[0]);
1639         PMD_INIT_LOG(DEBUG,
1640                      "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1641                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1642                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1643
1644         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1645                 config = &local_config;
1646
1647                 vtpci_read_dev_config(hw,
1648                         offsetof(struct virtio_net_config, mac),
1649                         &config->mac, sizeof(config->mac));
1650
1651                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1652                         vtpci_read_dev_config(hw,
1653                                 offsetof(struct virtio_net_config, status),
1654                                 &config->status, sizeof(config->status));
1655                 } else {
1656                         PMD_INIT_LOG(DEBUG,
1657                                      "VIRTIO_NET_F_STATUS is not supported");
1658                         config->status = 0;
1659                 }
1660
1661                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1662                         vtpci_read_dev_config(hw,
1663                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1664                                 &config->max_virtqueue_pairs,
1665                                 sizeof(config->max_virtqueue_pairs));
1666                 } else {
1667                         PMD_INIT_LOG(DEBUG,
1668                                      "VIRTIO_NET_F_MQ is not supported");
1669                         config->max_virtqueue_pairs = 1;
1670                 }
1671
1672                 hw->max_queue_pairs = config->max_virtqueue_pairs;
1673
1674                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1675                         vtpci_read_dev_config(hw,
1676                                 offsetof(struct virtio_net_config, mtu),
1677                                 &config->mtu,
1678                                 sizeof(config->mtu));
1679
1680                         /*
1681                          * MTU value has already been checked at negotiation
1682                          * time, but check again in case it has changed since
1683                          * then, which should not happen.
1684                          */
1685                         if (config->mtu < ETHER_MIN_MTU) {
1686                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1687                                                 config->mtu);
1688                                 return -1;
1689                         }
1690
1691                         hw->max_mtu = config->mtu;
1692                         /* Set initial MTU to maximum one supported by vhost */
1693                         eth_dev->data->mtu = config->mtu;
1694
1695                 } else {
1696                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1697                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1698                 }
1699
1700                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1701                                 config->max_virtqueue_pairs);
1702                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1703                 PMD_INIT_LOG(DEBUG,
1704                                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1705                                 config->mac[0], config->mac[1],
1706                                 config->mac[2], config->mac[3],
1707                                 config->mac[4], config->mac[5]);
1708         } else {
1709                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1710                 hw->max_queue_pairs = 1;
1711                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
1712                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
1713         }
1714
1715         ret = virtio_alloc_queues(eth_dev);
1716         if (ret < 0)
1717                 return ret;
1718
1719         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1720                 if (virtio_configure_intr(eth_dev) < 0) {
1721                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
1722                         return -1;
1723                 }
1724         }
1725
1726         vtpci_reinit_complete(hw);
1727
1728         if (pci_dev)
1729                 PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1730                         eth_dev->data->port_id, pci_dev->id.vendor_id,
1731                         pci_dev->id.device_id);
1732
1733         return 0;
1734 }
1735
1736 /*
1737  * Remap the PCI device again (IO port map for legacy device and
1738  * memory map for modern device), so that the secondary process
1739  * could have the PCI initiated correctly.
1740  */
1741 static int
1742 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
1743 {
1744         if (hw->modern) {
1745                 /*
1746                  * We don't have to re-parse the PCI config space, since
1747                  * rte_pci_map_device() makes sure the mapped address
1748                  * in secondary process would equal to the one mapped in
1749                  * the primary process: error will be returned if that
1750                  * requirement is not met.
1751                  *
1752                  * That said, we could simply reuse all cap pointers
1753                  * (such as dev_cfg, common_cfg, etc.) parsed from the
1754                  * primary process, which is stored in shared memory.
1755                  */
1756                 if (rte_pci_map_device(pci_dev)) {
1757                         PMD_INIT_LOG(DEBUG, "failed to map pci device!");
1758                         return -1;
1759                 }
1760         } else {
1761                 if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
1762                         return -1;
1763         }
1764
1765         return 0;
1766 }
1767
1768 static void
1769 virtio_set_vtpci_ops(struct virtio_hw *hw)
1770 {
1771 #ifdef RTE_VIRTIO_USER
1772         if (hw->virtio_user_dev)
1773                 VTPCI_OPS(hw) = &virtio_user_ops;
1774         else
1775 #endif
1776         if (hw->modern)
1777                 VTPCI_OPS(hw) = &modern_ops;
1778         else
1779                 VTPCI_OPS(hw) = &legacy_ops;
1780 }
1781
1782 /*
1783  * This function is based on probe() function in virtio_pci.c
1784  * It returns 0 on success.
1785  */
1786 int
1787 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1788 {
1789         struct virtio_hw *hw = eth_dev->data->dev_private;
1790         int ret;
1791
1792         RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
1793
1794         eth_dev->dev_ops = &virtio_eth_dev_ops;
1795
1796         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1797                 if (!hw->virtio_user_dev) {
1798                         ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1799                         if (ret)
1800                                 return ret;
1801                 }
1802
1803                 virtio_set_vtpci_ops(hw);
1804                 set_rxtx_funcs(eth_dev);
1805
1806                 return 0;
1807         }
1808
1809         /* Allocate memory for storing MAC addresses */
1810         eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
1811         if (eth_dev->data->mac_addrs == NULL) {
1812                 PMD_INIT_LOG(ERR,
1813                         "Failed to allocate %d bytes needed to store MAC addresses",
1814                         VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
1815                 return -ENOMEM;
1816         }
1817
1818         hw->port_id = eth_dev->data->port_id;
1819         /* For virtio_user case the hw->virtio_user_dev is populated by
1820          * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
1821          */
1822         if (!hw->virtio_user_dev) {
1823                 ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1824                 if (ret)
1825                         goto out;
1826         }
1827
1828         /* reset device and negotiate default features */
1829         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1830         if (ret < 0)
1831                 goto out;
1832
1833         return 0;
1834
1835 out:
1836         rte_free(eth_dev->data->mac_addrs);
1837         return ret;
1838 }
1839
1840 static int
1841 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
1842 {
1843         PMD_INIT_FUNC_TRACE();
1844
1845         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1846                 return 0;
1847
1848         virtio_dev_stop(eth_dev);
1849         virtio_dev_close(eth_dev);
1850
1851         eth_dev->dev_ops = NULL;
1852         eth_dev->tx_pkt_burst = NULL;
1853         eth_dev->rx_pkt_burst = NULL;
1854
1855         if (eth_dev->device)
1856                 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
1857
1858         PMD_INIT_LOG(DEBUG, "dev_uninit completed");
1859
1860         return 0;
1861 }
1862
1863 static int vdpa_check_handler(__rte_unused const char *key,
1864                 const char *value, __rte_unused void *opaque)
1865 {
1866         if (strcmp(value, "1"))
1867                 return -1;
1868
1869         return 0;
1870 }
1871
1872 static int
1873 vdpa_mode_selected(struct rte_devargs *devargs)
1874 {
1875         struct rte_kvargs *kvlist;
1876         const char *key = "vdpa";
1877         int ret = 0;
1878
1879         if (devargs == NULL)
1880                 return 0;
1881
1882         kvlist = rte_kvargs_parse(devargs->args, NULL);
1883         if (kvlist == NULL)
1884                 return 0;
1885
1886         if (!rte_kvargs_count(kvlist, key))
1887                 goto exit;
1888
1889         /* vdpa mode selected when there's a key-value pair: vdpa=1 */
1890         if (rte_kvargs_process(kvlist, key,
1891                                 vdpa_check_handler, NULL) < 0) {
1892                 goto exit;
1893         }
1894         ret = 1;
1895
1896 exit:
1897         rte_kvargs_free(kvlist);
1898         return ret;
1899 }
1900
1901 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1902         struct rte_pci_device *pci_dev)
1903 {
1904         if (rte_eal_iopl_init() != 0) {
1905                 PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
1906                 return 1;
1907         }
1908
1909         /* virtio pmd skips probe if device needs to work in vdpa mode */
1910         if (vdpa_mode_selected(pci_dev->device.devargs))
1911                 return 1;
1912
1913         return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
1914                 eth_virtio_dev_init);
1915 }
1916
1917 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
1918 {
1919         return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit);
1920 }
1921
1922 static struct rte_pci_driver rte_virtio_pmd = {
1923         .driver = {
1924                 .name = "net_virtio",
1925         },
1926         .id_table = pci_id_virtio_map,
1927         .drv_flags = 0,
1928         .probe = eth_virtio_pci_probe,
1929         .remove = eth_virtio_pci_remove,
1930 };
1931
1932 RTE_INIT(rte_virtio_pmd_init)
1933 {
1934         rte_eal_iopl_init();
1935         rte_pci_register(&rte_virtio_pmd);
1936 }
1937
1938 static bool
1939 rx_offload_enabled(struct virtio_hw *hw)
1940 {
1941         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
1942                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
1943                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
1944 }
1945
1946 static bool
1947 tx_offload_enabled(struct virtio_hw *hw)
1948 {
1949         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
1950                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
1951                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
1952 }
1953
1954 /*
1955  * Configure virtio device
1956  * It returns 0 on success.
1957  */
1958 static int
1959 virtio_dev_configure(struct rte_eth_dev *dev)
1960 {
1961         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1962         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
1963         struct virtio_hw *hw = dev->data->dev_private;
1964         uint64_t rx_offloads = rxmode->offloads;
1965         uint64_t tx_offloads = txmode->offloads;
1966         uint64_t req_features;
1967         int ret;
1968
1969         PMD_INIT_LOG(DEBUG, "configure");
1970         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
1971
1972         if (dev->data->dev_conf.intr_conf.rxq) {
1973                 ret = virtio_init_device(dev, hw->req_guest_features);
1974                 if (ret < 0)
1975                         return ret;
1976         }
1977
1978         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
1979                            DEV_RX_OFFLOAD_TCP_CKSUM))
1980                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
1981
1982         if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
1983                 req_features |=
1984                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
1985                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
1986
1987         if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
1988                            DEV_TX_OFFLOAD_TCP_CKSUM))
1989                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
1990
1991         if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
1992                 req_features |=
1993                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
1994                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
1995
1996         /* if request features changed, reinit the device */
1997         if (req_features != hw->req_guest_features) {
1998                 ret = virtio_init_device(dev, req_features);
1999                 if (ret < 0)
2000                         return ret;
2001         }
2002
2003         if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2004                             DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2005                 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2006                 PMD_DRV_LOG(ERR,
2007                         "rx checksum not available on this host");
2008                 return -ENOTSUP;
2009         }
2010
2011         if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2012                 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2013                  !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2014                 PMD_DRV_LOG(ERR,
2015                         "Large Receive Offload not available on this host");
2016                 return -ENOTSUP;
2017         }
2018
2019         /* start control queue */
2020         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2021                 virtio_dev_cq_start(dev);
2022
2023         if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2024                 hw->vlan_strip = 1;
2025
2026         if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2027             && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2028                 PMD_DRV_LOG(ERR,
2029                             "vlan filtering not available on this host");
2030                 return -ENOTSUP;
2031         }
2032
2033         hw->has_tx_offload = tx_offload_enabled(hw);
2034         hw->has_rx_offload = rx_offload_enabled(hw);
2035
2036         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2037                 /* Enable vector (0) for Link State Intrerrupt */
2038                 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
2039                                 VIRTIO_MSI_NO_VECTOR) {
2040                         PMD_DRV_LOG(ERR, "failed to set config vector");
2041                         return -EBUSY;
2042                 }
2043
2044         rte_spinlock_init(&hw->state_lock);
2045
2046         hw->use_simple_rx = 1;
2047
2048         if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2049                 hw->use_inorder_tx = 1;
2050                 hw->use_inorder_rx = 1;
2051                 hw->use_simple_rx = 0;
2052         }
2053
2054         if (vtpci_packed_queue(hw)) {
2055                 hw->use_simple_rx = 0;
2056                 hw->use_inorder_rx = 0;
2057                 hw->use_inorder_tx = 0;
2058         }
2059
2060 #if defined RTE_ARCH_ARM64 || defined RTE_ARCH_ARM
2061         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2062                 hw->use_simple_rx = 0;
2063         }
2064 #endif
2065         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2066                  hw->use_simple_rx = 0;
2067         }
2068
2069         if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2070                            DEV_RX_OFFLOAD_TCP_CKSUM |
2071                            DEV_RX_OFFLOAD_TCP_LRO |
2072                            DEV_RX_OFFLOAD_VLAN_STRIP))
2073                 hw->use_simple_rx = 0;
2074
2075         hw->opened = true;
2076
2077         return 0;
2078 }
2079
2080
2081 static int
2082 virtio_dev_start(struct rte_eth_dev *dev)
2083 {
2084         uint16_t nb_queues, i;
2085         struct virtnet_rx *rxvq;
2086         struct virtnet_tx *txvq __rte_unused;
2087         struct virtio_hw *hw = dev->data->dev_private;
2088         int ret;
2089
2090         /* Finish the initialization of the queues */
2091         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2092                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2093                 if (ret < 0)
2094                         return ret;
2095         }
2096         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2097                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2098                 if (ret < 0)
2099                         return ret;
2100         }
2101
2102         /* check if lsc interrupt feature is enabled */
2103         if (dev->data->dev_conf.intr_conf.lsc) {
2104                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2105                         PMD_DRV_LOG(ERR, "link status not supported by host");
2106                         return -ENOTSUP;
2107                 }
2108         }
2109
2110         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2111          * in device configure, but it could be unmapped  when device is
2112          * stopped.
2113          */
2114         if (dev->data->dev_conf.intr_conf.lsc ||
2115             dev->data->dev_conf.intr_conf.rxq) {
2116                 virtio_intr_disable(dev);
2117
2118                 /* Setup interrupt callback  */
2119                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2120                         rte_intr_callback_register(dev->intr_handle,
2121                                                    virtio_interrupt_handler,
2122                                                    dev);
2123
2124                 if (virtio_intr_enable(dev) < 0) {
2125                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2126                         return -EIO;
2127                 }
2128         }
2129
2130         /*Notify the backend
2131          *Otherwise the tap backend might already stop its queue due to fullness.
2132          *vhost backend will have no chance to be waked up
2133          */
2134         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2135         if (hw->max_queue_pairs > 1) {
2136                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2137                         return -EINVAL;
2138         }
2139
2140         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2141
2142         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2143                 rxvq = dev->data->rx_queues[i];
2144                 /* Flush the old packets */
2145                 virtqueue_rxvq_flush(rxvq->vq);
2146                 virtqueue_notify(rxvq->vq);
2147         }
2148
2149         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2150                 txvq = dev->data->tx_queues[i];
2151                 virtqueue_notify(txvq->vq);
2152         }
2153
2154         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2155
2156         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2157                 rxvq = dev->data->rx_queues[i];
2158                 VIRTQUEUE_DUMP(rxvq->vq);
2159         }
2160
2161         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2162                 txvq = dev->data->tx_queues[i];
2163                 VIRTQUEUE_DUMP(txvq->vq);
2164         }
2165
2166         set_rxtx_funcs(dev);
2167         hw->started = true;
2168
2169         /* Initialize Link state */
2170         virtio_dev_link_update(dev, 0);
2171
2172         return 0;
2173 }
2174
2175 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2176 {
2177         struct virtio_hw *hw = dev->data->dev_private;
2178         uint16_t nr_vq = virtio_get_nr_vq(hw);
2179         const char *type __rte_unused;
2180         unsigned int i, mbuf_num = 0;
2181         struct virtqueue *vq;
2182         struct rte_mbuf *buf;
2183         int queue_type;
2184
2185         if (hw->vqs == NULL)
2186                 return;
2187
2188         for (i = 0; i < nr_vq; i++) {
2189                 vq = hw->vqs[i];
2190                 if (!vq)
2191                         continue;
2192
2193                 queue_type = virtio_get_queue_type(hw, i);
2194                 if (queue_type == VTNET_RQ)
2195                         type = "rxq";
2196                 else if (queue_type == VTNET_TQ)
2197                         type = "txq";
2198                 else
2199                         continue;
2200
2201                 PMD_INIT_LOG(DEBUG,
2202                         "Before freeing %s[%d] used and unused buf",
2203                         type, i);
2204                 VIRTQUEUE_DUMP(vq);
2205
2206                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2207                         rte_pktmbuf_free(buf);
2208                         mbuf_num++;
2209                 }
2210
2211                 PMD_INIT_LOG(DEBUG,
2212                         "After freeing %s[%d] used and unused buf",
2213                         type, i);
2214                 VIRTQUEUE_DUMP(vq);
2215         }
2216
2217         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2218 }
2219
2220 /*
2221  * Stop device: disable interrupt and mark link down
2222  */
2223 static void
2224 virtio_dev_stop(struct rte_eth_dev *dev)
2225 {
2226         struct virtio_hw *hw = dev->data->dev_private;
2227         struct rte_eth_link link;
2228         struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2229
2230         PMD_INIT_LOG(DEBUG, "stop");
2231
2232         rte_spinlock_lock(&hw->state_lock);
2233         if (!hw->started)
2234                 goto out_unlock;
2235         hw->started = false;
2236
2237         if (intr_conf->lsc || intr_conf->rxq) {
2238                 virtio_intr_disable(dev);
2239
2240                 /* Reset interrupt callback  */
2241                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2242                         rte_intr_callback_unregister(dev->intr_handle,
2243                                                      virtio_interrupt_handler,
2244                                                      dev);
2245                 }
2246         }
2247
2248         memset(&link, 0, sizeof(link));
2249         rte_eth_linkstatus_set(dev, &link);
2250 out_unlock:
2251         rte_spinlock_unlock(&hw->state_lock);
2252 }
2253
2254 static int
2255 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2256 {
2257         struct rte_eth_link link;
2258         uint16_t status;
2259         struct virtio_hw *hw = dev->data->dev_private;
2260
2261         memset(&link, 0, sizeof(link));
2262         link.link_duplex = ETH_LINK_FULL_DUPLEX;
2263         link.link_speed  = ETH_SPEED_NUM_10G;
2264         link.link_autoneg = ETH_LINK_FIXED;
2265
2266         if (!hw->started) {
2267                 link.link_status = ETH_LINK_DOWN;
2268         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2269                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2270                 vtpci_read_dev_config(hw,
2271                                 offsetof(struct virtio_net_config, status),
2272                                 &status, sizeof(status));
2273                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2274                         link.link_status = ETH_LINK_DOWN;
2275                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2276                                      dev->data->port_id);
2277                 } else {
2278                         link.link_status = ETH_LINK_UP;
2279                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2280                                      dev->data->port_id);
2281                 }
2282         } else {
2283                 link.link_status = ETH_LINK_UP;
2284         }
2285
2286         return rte_eth_linkstatus_set(dev, &link);
2287 }
2288
2289 static int
2290 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2291 {
2292         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2293         struct virtio_hw *hw = dev->data->dev_private;
2294         uint64_t offloads = rxmode->offloads;
2295
2296         if (mask & ETH_VLAN_FILTER_MASK) {
2297                 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2298                                 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2299
2300                         PMD_DRV_LOG(NOTICE,
2301                                 "vlan filtering not available on this host");
2302
2303                         return -ENOTSUP;
2304                 }
2305         }
2306
2307         if (mask & ETH_VLAN_STRIP_MASK)
2308                 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2309
2310         return 0;
2311 }
2312
2313 static void
2314 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2315 {
2316         uint64_t tso_mask, host_features;
2317         struct virtio_hw *hw = dev->data->dev_private;
2318
2319         dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */
2320
2321         dev_info->max_rx_queues =
2322                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2323         dev_info->max_tx_queues =
2324                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2325         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2326         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2327         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2328
2329         host_features = VTPCI_OPS(hw)->get_features(hw);
2330         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2331         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2332                 dev_info->rx_offload_capa |=
2333                         DEV_RX_OFFLOAD_TCP_CKSUM |
2334                         DEV_RX_OFFLOAD_UDP_CKSUM;
2335         }
2336         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2337                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2338         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2339                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2340         if ((host_features & tso_mask) == tso_mask)
2341                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2342
2343         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2344                                     DEV_TX_OFFLOAD_VLAN_INSERT;
2345         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2346                 dev_info->tx_offload_capa |=
2347                         DEV_TX_OFFLOAD_UDP_CKSUM |
2348                         DEV_TX_OFFLOAD_TCP_CKSUM;
2349         }
2350         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2351                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2352         if ((host_features & tso_mask) == tso_mask)
2353                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2354 }
2355
2356 /*
2357  * It enables testpmd to collect per queue stats.
2358  */
2359 static int
2360 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2361 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2362 __rte_unused uint8_t is_rx)
2363 {
2364         return 0;
2365 }
2366
2367 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
2368 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
2369 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci");
2370
2371 RTE_INIT(virtio_init_log)
2372 {
2373         virtio_logtype_init = rte_log_register("pmd.net.virtio.init");
2374         if (virtio_logtype_init >= 0)
2375                 rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE);
2376         virtio_logtype_driver = rte_log_register("pmd.net.virtio.driver");
2377         if (virtio_logtype_driver >= 0)
2378                 rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE);
2379 }