4570bec057d106d678d71b819844056738596070
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47         uint32_t *speed,
48         int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50                                 struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52         int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
55                 struct rte_eth_rss_conf *rss_conf);
56 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
57                 struct rte_eth_rss_conf *rss_conf);
58 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
59                          struct rte_eth_rss_reta_entry64 *reta_conf,
60                          uint16_t reta_size);
61 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
62                          struct rte_eth_rss_reta_entry64 *reta_conf,
63                          uint16_t reta_size);
64
65 static void virtio_set_hwaddr(struct virtio_hw *hw);
66 static void virtio_get_hwaddr(struct virtio_hw *hw);
67
68 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
69                                  struct rte_eth_stats *stats);
70 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
71                                  struct rte_eth_xstat *xstats, unsigned n);
72 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
73                                        struct rte_eth_xstat_name *xstats_names,
74                                        unsigned limit);
75 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
76 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
77 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
78                                 uint16_t vlan_id, int on);
79 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
80                                 struct rte_ether_addr *mac_addr,
81                                 uint32_t index, uint32_t vmdq);
82 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
83 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
84                                 struct rte_ether_addr *mac_addr);
85
86 static int virtio_intr_disable(struct rte_eth_dev *dev);
87 static int virtio_get_monitor_addr(void *rx_queue,
88                                 struct rte_power_monitor_cond *pmc);
89
90 static int virtio_dev_queue_stats_mapping_set(
91         struct rte_eth_dev *eth_dev,
92         uint16_t queue_id,
93         uint8_t stat_idx,
94         uint8_t is_rx);
95
96 static void virtio_notify_peers(struct rte_eth_dev *dev);
97 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
98
99 struct rte_virtio_xstats_name_off {
100         char name[RTE_ETH_XSTATS_NAME_SIZE];
101         unsigned offset;
102 };
103
104 /* [rt]x_qX_ is prepended to the name string here */
105 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
106         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
107         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
108         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
109         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
110         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
111         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
112         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
113         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
114         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
115         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
116         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
117         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
118         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
119 };
120
121 /* [rt]x_qX_ is prepended to the name string here */
122 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
123         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
124         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
125         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138                             sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140                             sizeof(rte_virtio_txq_stat_strings[0]))
141
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146                            struct virtio_pmd_ctrl *ctrl,
147                            int *dlen, int pkt_num)
148 {
149         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
150         int head;
151         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
152         struct virtio_pmd_ctrl *result;
153         uint16_t flags;
154         int sum = 0;
155         int nb_descs = 0;
156         int k;
157
158         /*
159          * Format is enforced in qemu code:
160          * One TX packet for header;
161          * At least one TX packet per argument;
162          * One RX packet for ACK.
163          */
164         head = vq->vq_avail_idx;
165         flags = vq->vq_packed.cached_flags;
166         desc[head].addr = cvq->virtio_net_hdr_mem;
167         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168         vq->vq_free_cnt--;
169         nb_descs++;
170         if (++vq->vq_avail_idx >= vq->vq_nentries) {
171                 vq->vq_avail_idx -= vq->vq_nentries;
172                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
173         }
174
175         for (k = 0; k < pkt_num; k++) {
176                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177                         + sizeof(struct virtio_net_ctrl_hdr)
178                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179                 desc[vq->vq_avail_idx].len = dlen[k];
180                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181                         vq->vq_packed.cached_flags;
182                 sum += dlen[k];
183                 vq->vq_free_cnt--;
184                 nb_descs++;
185                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
186                         vq->vq_avail_idx -= vq->vq_nentries;
187                         vq->vq_packed.cached_flags ^=
188                                 VRING_PACKED_DESC_F_AVAIL_USED;
189                 }
190         }
191
192         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193                 + sizeof(struct virtio_net_ctrl_hdr);
194         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196                 vq->vq_packed.cached_flags;
197         vq->vq_free_cnt--;
198         nb_descs++;
199         if (++vq->vq_avail_idx >= vq->vq_nentries) {
200                 vq->vq_avail_idx -= vq->vq_nentries;
201                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
202         }
203
204         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
205                         vq->hw->weak_barriers);
206
207         virtio_wmb(vq->hw->weak_barriers);
208         virtqueue_notify(vq);
209
210         /* wait for used desc in virtqueue
211          * desc_is_used has a load-acquire or rte_io_rmb inside
212          */
213         while (!desc_is_used(&desc[head], vq))
214                 usleep(100);
215
216         /* now get used descriptors */
217         vq->vq_free_cnt += nb_descs;
218         vq->vq_used_cons_idx += nb_descs;
219         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
220                 vq->vq_used_cons_idx -= vq->vq_nentries;
221                 vq->vq_packed.used_wrap_counter ^= 1;
222         }
223
224         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
225                         "vq->vq_avail_idx=%d\n"
226                         "vq->vq_used_cons_idx=%d\n"
227                         "vq->vq_packed.cached_flags=0x%x\n"
228                         "vq->vq_packed.used_wrap_counter=%d",
229                         vq->vq_free_cnt,
230                         vq->vq_avail_idx,
231                         vq->vq_used_cons_idx,
232                         vq->vq_packed.cached_flags,
233                         vq->vq_packed.used_wrap_counter);
234
235         result = cvq->virtio_net_hdr_mz->addr;
236         return result;
237 }
238
239 static struct virtio_pmd_ctrl *
240 virtio_send_command_split(struct virtnet_ctl *cvq,
241                           struct virtio_pmd_ctrl *ctrl,
242                           int *dlen, int pkt_num)
243 {
244         struct virtio_pmd_ctrl *result;
245         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
246         uint32_t head, i;
247         int k, sum = 0;
248
249         head = vq->vq_desc_head_idx;
250
251         /*
252          * Format is enforced in qemu code:
253          * One TX packet for header;
254          * At least one TX packet per argument;
255          * One RX packet for ACK.
256          */
257         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
258         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
259         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
260         vq->vq_free_cnt--;
261         i = vq->vq_split.ring.desc[head].next;
262
263         for (k = 0; k < pkt_num; k++) {
264                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
265                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266                         + sizeof(struct virtio_net_ctrl_hdr)
267                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
268                 vq->vq_split.ring.desc[i].len = dlen[k];
269                 sum += dlen[k];
270                 vq->vq_free_cnt--;
271                 i = vq->vq_split.ring.desc[i].next;
272         }
273
274         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
275         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
276                         + sizeof(struct virtio_net_ctrl_hdr);
277         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
278         vq->vq_free_cnt--;
279
280         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
281
282         vq_update_avail_ring(vq, head);
283         vq_update_avail_idx(vq);
284
285         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
286
287         virtqueue_notify(vq);
288
289         while (virtqueue_nused(vq) == 0)
290                 usleep(100);
291
292         while (virtqueue_nused(vq)) {
293                 uint32_t idx, desc_idx, used_idx;
294                 struct vring_used_elem *uep;
295
296                 used_idx = (uint32_t)(vq->vq_used_cons_idx
297                                 & (vq->vq_nentries - 1));
298                 uep = &vq->vq_split.ring.used->ring[used_idx];
299                 idx = (uint32_t) uep->id;
300                 desc_idx = idx;
301
302                 while (vq->vq_split.ring.desc[desc_idx].flags &
303                                 VRING_DESC_F_NEXT) {
304                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
305                         vq->vq_free_cnt++;
306                 }
307
308                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
309                 vq->vq_desc_head_idx = idx;
310
311                 vq->vq_used_cons_idx++;
312                 vq->vq_free_cnt++;
313         }
314
315         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
316                         vq->vq_free_cnt, vq->vq_desc_head_idx);
317
318         result = cvq->virtio_net_hdr_mz->addr;
319         return result;
320 }
321
322 static int
323 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
324                     int *dlen, int pkt_num)
325 {
326         virtio_net_ctrl_ack status = ~0;
327         struct virtio_pmd_ctrl *result;
328         struct virtqueue *vq;
329
330         ctrl->status = status;
331
332         if (!cvq) {
333                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
334                 return -1;
335         }
336
337         rte_spinlock_lock(&cvq->lock);
338         vq = virtnet_cq_to_vq(cvq);
339
340         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
341                 "vq->hw->cvq = %p vq = %p",
342                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
343
344         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
345                 rte_spinlock_unlock(&cvq->lock);
346                 return -1;
347         }
348
349         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
350                 sizeof(struct virtio_pmd_ctrl));
351
352         if (virtio_with_packed_queue(vq->hw))
353                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
354         else
355                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
356
357         rte_spinlock_unlock(&cvq->lock);
358         return result->status;
359 }
360
361 static int
362 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
363 {
364         struct virtio_hw *hw = dev->data->dev_private;
365         struct virtio_pmd_ctrl ctrl;
366         struct virtio_net_ctrl_rss rss;
367         int dlen, ret;
368
369         rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
370         RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
371         rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
372         rss.unclassified_queue = 0;
373         memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
374         rss.max_tx_vq = nb_queues;
375         rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
376         memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
377
378         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
379         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
380         memcpy(ctrl.data, &rss, sizeof(rss));
381
382         dlen = sizeof(rss);
383
384         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
385         if (ret) {
386                 PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
387                 return -EINVAL;
388         }
389
390         return 0;
391 }
392
393 static int
394 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
395 {
396         struct virtio_hw *hw = dev->data->dev_private;
397         struct virtio_pmd_ctrl ctrl;
398         int dlen;
399         int ret;
400
401         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
402         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
403         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
404
405         dlen = sizeof(uint16_t);
406
407         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
408         if (ret) {
409                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
410                           "failed, this is too late now...");
411                 return -EINVAL;
412         }
413
414         return 0;
415 }
416
417 static int
418 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
419 {
420         struct virtio_hw *hw = dev->data->dev_private;
421
422         if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
423                 return virtio_set_multiple_queues_rss(dev, nb_queues);
424         else
425                 return virtio_set_multiple_queues_auto(dev, nb_queues);
426 }
427
428 static uint16_t
429 virtio_get_nr_vq(struct virtio_hw *hw)
430 {
431         uint16_t nr_vq = hw->max_queue_pairs * 2;
432
433         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
434                 nr_vq += 1;
435
436         return nr_vq;
437 }
438
439 static void
440 virtio_init_vring(struct virtqueue *vq)
441 {
442         int size = vq->vq_nentries;
443         uint8_t *ring_mem = vq->vq_ring_virt_mem;
444
445         PMD_INIT_FUNC_TRACE();
446
447         memset(ring_mem, 0, vq->vq_ring_size);
448
449         vq->vq_used_cons_idx = 0;
450         vq->vq_desc_head_idx = 0;
451         vq->vq_avail_idx = 0;
452         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
453         vq->vq_free_cnt = vq->vq_nentries;
454         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
455         if (virtio_with_packed_queue(vq->hw)) {
456                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
457                                   VIRTIO_VRING_ALIGN, size);
458                 vring_desc_init_packed(vq, size);
459         } else {
460                 struct vring *vr = &vq->vq_split.ring;
461
462                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
463                 vring_desc_init_split(vr->desc, size);
464         }
465         /*
466          * Disable device(host) interrupting guest
467          */
468         virtqueue_disable_intr(vq);
469 }
470
471 static int
472 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
473 {
474         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
475         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
476         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
477         unsigned int vq_size, size;
478         struct virtio_hw *hw = dev->data->dev_private;
479         struct virtnet_rx *rxvq = NULL;
480         struct virtnet_tx *txvq = NULL;
481         struct virtnet_ctl *cvq = NULL;
482         struct virtqueue *vq;
483         size_t sz_hdr_mz = 0;
484         void *sw_ring = NULL;
485         int queue_type = virtio_get_queue_type(hw, queue_idx);
486         int ret;
487         int numa_node = dev->device->numa_node;
488         struct rte_mbuf *fake_mbuf = NULL;
489
490         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
491                         queue_idx, numa_node);
492
493         /*
494          * Read the virtqueue size from the Queue Size field
495          * Always power of 2 and if 0 virtqueue does not exist
496          */
497         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
498         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
499         if (vq_size == 0) {
500                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
501                 return -EINVAL;
502         }
503
504         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
505                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
506                 return -EINVAL;
507         }
508
509         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
510                  dev->data->port_id, queue_idx);
511
512         size = RTE_ALIGN_CEIL(sizeof(*vq) +
513                                 vq_size * sizeof(struct vq_desc_extra),
514                                 RTE_CACHE_LINE_SIZE);
515         if (queue_type == VTNET_TQ) {
516                 /*
517                  * For each xmit packet, allocate a virtio_net_hdr
518                  * and indirect ring elements
519                  */
520                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
521         } else if (queue_type == VTNET_CQ) {
522                 /* Allocate a page for control vq command, data and status */
523                 sz_hdr_mz = rte_mem_page_size();
524         }
525
526         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
527                                 numa_node);
528         if (vq == NULL) {
529                 PMD_INIT_LOG(ERR, "can not allocate vq");
530                 return -ENOMEM;
531         }
532         hw->vqs[queue_idx] = vq;
533
534         vq->hw = hw;
535         vq->vq_queue_index = queue_idx;
536         vq->vq_nentries = vq_size;
537         if (virtio_with_packed_queue(hw)) {
538                 vq->vq_packed.used_wrap_counter = 1;
539                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
540                 vq->vq_packed.event_flags_shadow = 0;
541                 if (queue_type == VTNET_RQ)
542                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
543         }
544
545         /*
546          * Reserve a memzone for vring elements
547          */
548         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
549         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
550         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
551                      size, vq->vq_ring_size);
552
553         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
554                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
555                         VIRTIO_VRING_ALIGN);
556         if (mz == NULL) {
557                 if (rte_errno == EEXIST)
558                         mz = rte_memzone_lookup(vq_name);
559                 if (mz == NULL) {
560                         ret = -ENOMEM;
561                         goto free_vq;
562                 }
563         }
564
565         memset(mz->addr, 0, mz->len);
566
567         if (hw->use_va)
568                 vq->vq_ring_mem = (uintptr_t)mz->addr;
569         else
570                 vq->vq_ring_mem = mz->iova;
571
572         vq->vq_ring_virt_mem = mz->addr;
573         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
574         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
575
576         virtio_init_vring(vq);
577
578         if (sz_hdr_mz) {
579                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
580                          dev->data->port_id, queue_idx);
581                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
582                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
583                                 RTE_CACHE_LINE_SIZE);
584                 if (hdr_mz == NULL) {
585                         if (rte_errno == EEXIST)
586                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
587                         if (hdr_mz == NULL) {
588                                 ret = -ENOMEM;
589                                 goto free_mz;
590                         }
591                 }
592         }
593
594         if (queue_type == VTNET_RQ) {
595                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
596                                sizeof(vq->sw_ring[0]);
597
598                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
599                                 RTE_CACHE_LINE_SIZE, numa_node);
600                 if (!sw_ring) {
601                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
602                         ret = -ENOMEM;
603                         goto free_hdr_mz;
604                 }
605
606                 fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
607                                 RTE_CACHE_LINE_SIZE, numa_node);
608                 if (!fake_mbuf) {
609                         PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
610                         ret = -ENOMEM;
611                         goto free_sw_ring;
612                 }
613
614                 vq->sw_ring = sw_ring;
615                 rxvq = &vq->rxq;
616                 rxvq->port_id = dev->data->port_id;
617                 rxvq->mz = mz;
618                 rxvq->fake_mbuf = fake_mbuf;
619         } else if (queue_type == VTNET_TQ) {
620                 txvq = &vq->txq;
621                 txvq->port_id = dev->data->port_id;
622                 txvq->mz = mz;
623                 txvq->virtio_net_hdr_mz = hdr_mz;
624                 if (hw->use_va)
625                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
626                 else
627                         txvq->virtio_net_hdr_mem = hdr_mz->iova;
628         } else if (queue_type == VTNET_CQ) {
629                 cvq = &vq->cq;
630                 cvq->mz = mz;
631                 cvq->virtio_net_hdr_mz = hdr_mz;
632                 if (hw->use_va)
633                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
634                 else
635                         cvq->virtio_net_hdr_mem = hdr_mz->iova;
636                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
637
638                 hw->cvq = cvq;
639         }
640
641         if (hw->use_va)
642                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
643         else
644                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
645
646         if (queue_type == VTNET_TQ) {
647                 struct virtio_tx_region *txr;
648                 unsigned int i;
649
650                 txr = hdr_mz->addr;
651                 memset(txr, 0, vq_size * sizeof(*txr));
652                 for (i = 0; i < vq_size; i++) {
653                         /* first indirect descriptor is always the tx header */
654                         if (!virtio_with_packed_queue(hw)) {
655                                 struct vring_desc *start_dp = txr[i].tx_indir;
656                                 vring_desc_init_split(start_dp,
657                                                       RTE_DIM(txr[i].tx_indir));
658                                 start_dp->addr = txvq->virtio_net_hdr_mem
659                                         + i * sizeof(*txr)
660                                         + offsetof(struct virtio_tx_region,
661                                                    tx_hdr);
662                                 start_dp->len = hw->vtnet_hdr_size;
663                                 start_dp->flags = VRING_DESC_F_NEXT;
664                         } else {
665                                 struct vring_packed_desc *start_dp =
666                                         txr[i].tx_packed_indir;
667                                 vring_desc_init_indirect_packed(start_dp,
668                                       RTE_DIM(txr[i].tx_packed_indir));
669                                 start_dp->addr = txvq->virtio_net_hdr_mem
670                                         + i * sizeof(*txr)
671                                         + offsetof(struct virtio_tx_region,
672                                                    tx_hdr);
673                                 start_dp->len = hw->vtnet_hdr_size;
674                         }
675                 }
676         }
677
678         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
679                 PMD_INIT_LOG(ERR, "setup_queue failed");
680                 ret = -EINVAL;
681                 goto clean_vq;
682         }
683
684         return 0;
685
686 clean_vq:
687         hw->cvq = NULL;
688         rte_free(fake_mbuf);
689 free_sw_ring:
690         rte_free(sw_ring);
691 free_hdr_mz:
692         rte_memzone_free(hdr_mz);
693 free_mz:
694         rte_memzone_free(mz);
695 free_vq:
696         rte_free(vq);
697         hw->vqs[queue_idx] = NULL;
698
699         return ret;
700 }
701
702 static void
703 virtio_free_queues(struct virtio_hw *hw)
704 {
705         uint16_t nr_vq = virtio_get_nr_vq(hw);
706         struct virtqueue *vq;
707         int queue_type;
708         uint16_t i;
709
710         if (hw->vqs == NULL)
711                 return;
712
713         for (i = 0; i < nr_vq; i++) {
714                 vq = hw->vqs[i];
715                 if (!vq)
716                         continue;
717
718                 queue_type = virtio_get_queue_type(hw, i);
719                 if (queue_type == VTNET_RQ) {
720                         rte_free(vq->rxq.fake_mbuf);
721                         rte_free(vq->sw_ring);
722                         rte_memzone_free(vq->rxq.mz);
723                 } else if (queue_type == VTNET_TQ) {
724                         rte_memzone_free(vq->txq.mz);
725                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
726                 } else {
727                         rte_memzone_free(vq->cq.mz);
728                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
729                 }
730
731                 rte_free(vq);
732                 hw->vqs[i] = NULL;
733         }
734
735         rte_free(hw->vqs);
736         hw->vqs = NULL;
737 }
738
739 static int
740 virtio_alloc_queues(struct rte_eth_dev *dev)
741 {
742         struct virtio_hw *hw = dev->data->dev_private;
743         uint16_t nr_vq = virtio_get_nr_vq(hw);
744         uint16_t i;
745         int ret;
746
747         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
748         if (!hw->vqs) {
749                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
750                 return -ENOMEM;
751         }
752
753         for (i = 0; i < nr_vq; i++) {
754                 ret = virtio_init_queue(dev, i);
755                 if (ret < 0) {
756                         virtio_free_queues(hw);
757                         return ret;
758                 }
759         }
760
761         return 0;
762 }
763
764 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
765
766 static void
767 virtio_free_rss(struct virtio_hw *hw)
768 {
769         rte_free(hw->rss_key);
770         hw->rss_key = NULL;
771
772         rte_free(hw->rss_reta);
773         hw->rss_reta = NULL;
774 }
775
776 int
777 virtio_dev_close(struct rte_eth_dev *dev)
778 {
779         struct virtio_hw *hw = dev->data->dev_private;
780         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
781
782         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
783         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
784                 return 0;
785
786         if (!hw->opened)
787                 return 0;
788         hw->opened = 0;
789
790         /* reset the NIC */
791         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
792                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
793         if (intr_conf->rxq)
794                 virtio_queues_unbind_intr(dev);
795
796         if (intr_conf->lsc || intr_conf->rxq) {
797                 virtio_intr_disable(dev);
798                 rte_intr_efd_disable(dev->intr_handle);
799                 rte_intr_vec_list_free(dev->intr_handle);
800         }
801
802         virtio_reset(hw);
803         virtio_dev_free_mbufs(dev);
804         virtio_free_queues(hw);
805         virtio_free_rss(hw);
806
807         return VIRTIO_OPS(hw)->dev_close(hw);
808 }
809
810 static int
811 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
812 {
813         struct virtio_hw *hw = dev->data->dev_private;
814         struct virtio_pmd_ctrl ctrl;
815         int dlen[1];
816         int ret;
817
818         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
819                 PMD_INIT_LOG(INFO, "host does not support rx control");
820                 return -ENOTSUP;
821         }
822
823         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
824         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
825         ctrl.data[0] = 1;
826         dlen[0] = 1;
827
828         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
829         if (ret) {
830                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
831                 return -EAGAIN;
832         }
833
834         return 0;
835 }
836
837 static int
838 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
839 {
840         struct virtio_hw *hw = dev->data->dev_private;
841         struct virtio_pmd_ctrl ctrl;
842         int dlen[1];
843         int ret;
844
845         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
846                 PMD_INIT_LOG(INFO, "host does not support rx control");
847                 return -ENOTSUP;
848         }
849
850         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
851         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
852         ctrl.data[0] = 0;
853         dlen[0] = 1;
854
855         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
856         if (ret) {
857                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
858                 return -EAGAIN;
859         }
860
861         return 0;
862 }
863
864 static int
865 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
866 {
867         struct virtio_hw *hw = dev->data->dev_private;
868         struct virtio_pmd_ctrl ctrl;
869         int dlen[1];
870         int ret;
871
872         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
873                 PMD_INIT_LOG(INFO, "host does not support rx control");
874                 return -ENOTSUP;
875         }
876
877         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
878         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
879         ctrl.data[0] = 1;
880         dlen[0] = 1;
881
882         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
883         if (ret) {
884                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
885                 return -EAGAIN;
886         }
887
888         return 0;
889 }
890
891 static int
892 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
893 {
894         struct virtio_hw *hw = dev->data->dev_private;
895         struct virtio_pmd_ctrl ctrl;
896         int dlen[1];
897         int ret;
898
899         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
900                 PMD_INIT_LOG(INFO, "host does not support rx control");
901                 return -ENOTSUP;
902         }
903
904         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
905         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
906         ctrl.data[0] = 0;
907         dlen[0] = 1;
908
909         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
910         if (ret) {
911                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
912                 return -EAGAIN;
913         }
914
915         return 0;
916 }
917
918 uint16_t
919 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
920 {
921         return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
922 }
923
924 bool
925 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
926                         bool rx_scatter_enabled, const char **error)
927 {
928         if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
929                 *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
930                 return false;
931         }
932
933         return true;
934 }
935
936 static bool
937 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
938                                       uint16_t frame_size)
939 {
940         struct virtio_hw *hw = dev->data->dev_private;
941         struct virtnet_rx *rxvq;
942         struct virtqueue *vq;
943         unsigned int qidx;
944         uint16_t buf_size;
945         const char *error;
946
947         if (hw->vqs == NULL)
948                 return true;
949
950         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
951                 vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
952                 if (vq == NULL)
953                         continue;
954
955                 rxvq = &vq->rxq;
956                 if (rxvq->mpool == NULL)
957                         continue;
958                 buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
959
960                 if (!virtio_rx_check_scatter(frame_size, buf_size,
961                                              hw->rx_ol_scatter, &error)) {
962                         PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
963                                      qidx, error);
964                         return false;
965                 }
966         }
967
968         return true;
969 }
970
971 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
972 static int
973 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
974 {
975         struct virtio_hw *hw = dev->data->dev_private;
976         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
977                                  hw->vtnet_hdr_size;
978         uint32_t frame_size = mtu + ether_hdr_len;
979         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
980
981         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
982
983         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
984                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
985                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
986                 return -EINVAL;
987         }
988
989         if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
990                 PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
991                 return -EINVAL;
992         }
993
994         hw->max_rx_pkt_len = frame_size;
995
996         return 0;
997 }
998
999 static int
1000 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1001 {
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1004         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1005
1006         virtqueue_enable_intr(vq);
1007         virtio_mb(hw->weak_barriers);
1008         return 0;
1009 }
1010
1011 static int
1012 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1013 {
1014         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1015         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1016
1017         virtqueue_disable_intr(vq);
1018         return 0;
1019 }
1020
1021 /*
1022  * dev_ops for virtio, bare necessities for basic operation
1023  */
1024 static const struct eth_dev_ops virtio_eth_dev_ops = {
1025         .dev_configure           = virtio_dev_configure,
1026         .dev_start               = virtio_dev_start,
1027         .dev_stop                = virtio_dev_stop,
1028         .dev_close               = virtio_dev_close,
1029         .promiscuous_enable      = virtio_dev_promiscuous_enable,
1030         .promiscuous_disable     = virtio_dev_promiscuous_disable,
1031         .allmulticast_enable     = virtio_dev_allmulticast_enable,
1032         .allmulticast_disable    = virtio_dev_allmulticast_disable,
1033         .mtu_set                 = virtio_mtu_set,
1034         .dev_infos_get           = virtio_dev_info_get,
1035         .stats_get               = virtio_dev_stats_get,
1036         .xstats_get              = virtio_dev_xstats_get,
1037         .xstats_get_names        = virtio_dev_xstats_get_names,
1038         .stats_reset             = virtio_dev_stats_reset,
1039         .xstats_reset            = virtio_dev_stats_reset,
1040         .link_update             = virtio_dev_link_update,
1041         .vlan_offload_set        = virtio_dev_vlan_offload_set,
1042         .rx_queue_setup          = virtio_dev_rx_queue_setup,
1043         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1044         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1045         .tx_queue_setup          = virtio_dev_tx_queue_setup,
1046         .rss_hash_update         = virtio_dev_rss_hash_update,
1047         .rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1048         .reta_update             = virtio_dev_rss_reta_update,
1049         .reta_query              = virtio_dev_rss_reta_query,
1050         /* collect stats per queue */
1051         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1052         .vlan_filter_set         = virtio_vlan_filter_set,
1053         .mac_addr_add            = virtio_mac_addr_add,
1054         .mac_addr_remove         = virtio_mac_addr_remove,
1055         .mac_addr_set            = virtio_mac_addr_set,
1056         .get_monitor_addr        = virtio_get_monitor_addr,
1057 };
1058
1059 /*
1060  * dev_ops for virtio-user in secondary processes, as we just have
1061  * some limited supports currently.
1062  */
1063 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1064         .dev_infos_get           = virtio_dev_info_get,
1065         .stats_get               = virtio_dev_stats_get,
1066         .xstats_get              = virtio_dev_xstats_get,
1067         .xstats_get_names        = virtio_dev_xstats_get_names,
1068         .stats_reset             = virtio_dev_stats_reset,
1069         .xstats_reset            = virtio_dev_stats_reset,
1070         /* collect stats per queue */
1071         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1072 };
1073
1074 static void
1075 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1076 {
1077         unsigned i;
1078
1079         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1080                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1081                 if (txvq == NULL)
1082                         continue;
1083
1084                 stats->opackets += txvq->stats.packets;
1085                 stats->obytes += txvq->stats.bytes;
1086
1087                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1088                         stats->q_opackets[i] = txvq->stats.packets;
1089                         stats->q_obytes[i] = txvq->stats.bytes;
1090                 }
1091         }
1092
1093         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1094                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1095                 if (rxvq == NULL)
1096                         continue;
1097
1098                 stats->ipackets += rxvq->stats.packets;
1099                 stats->ibytes += rxvq->stats.bytes;
1100                 stats->ierrors += rxvq->stats.errors;
1101
1102                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1103                         stats->q_ipackets[i] = rxvq->stats.packets;
1104                         stats->q_ibytes[i] = rxvq->stats.bytes;
1105                 }
1106         }
1107
1108         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1109 }
1110
1111 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1112                                        struct rte_eth_xstat_name *xstats_names,
1113                                        __rte_unused unsigned limit)
1114 {
1115         unsigned i;
1116         unsigned count = 0;
1117         unsigned t;
1118
1119         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1120                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1121
1122         if (xstats_names != NULL) {
1123                 /* Note: limit checked in rte_eth_xstats_names() */
1124
1125                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1127                         if (rxvq == NULL)
1128                                 continue;
1129                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1130                                 snprintf(xstats_names[count].name,
1131                                         sizeof(xstats_names[count].name),
1132                                         "rx_q%u_%s", i,
1133                                         rte_virtio_rxq_stat_strings[t].name);
1134                                 count++;
1135                         }
1136                 }
1137
1138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140                         if (txvq == NULL)
1141                                 continue;
1142                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1143                                 snprintf(xstats_names[count].name,
1144                                         sizeof(xstats_names[count].name),
1145                                         "tx_q%u_%s", i,
1146                                         rte_virtio_txq_stat_strings[t].name);
1147                                 count++;
1148                         }
1149                 }
1150                 return count;
1151         }
1152         return nstats;
1153 }
1154
1155 static int
1156 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1157                       unsigned n)
1158 {
1159         unsigned i;
1160         unsigned count = 0;
1161
1162         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1163                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1164
1165         if (n < nstats)
1166                 return nstats;
1167
1168         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1169                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1170
1171                 if (rxvq == NULL)
1172                         continue;
1173
1174                 unsigned t;
1175
1176                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1177                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1178                                 rte_virtio_rxq_stat_strings[t].offset);
1179                         xstats[count].id = count;
1180                         count++;
1181                 }
1182         }
1183
1184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1186
1187                 if (txvq == NULL)
1188                         continue;
1189
1190                 unsigned t;
1191
1192                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1193                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1194                                 rte_virtio_txq_stat_strings[t].offset);
1195                         xstats[count].id = count;
1196                         count++;
1197                 }
1198         }
1199
1200         return count;
1201 }
1202
1203 static int
1204 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1205 {
1206         virtio_update_stats(dev, stats);
1207
1208         return 0;
1209 }
1210
1211 static int
1212 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1213 {
1214         unsigned int i;
1215
1216         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1217                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1218                 if (txvq == NULL)
1219                         continue;
1220
1221                 txvq->stats.packets = 0;
1222                 txvq->stats.bytes = 0;
1223                 txvq->stats.multicast = 0;
1224                 txvq->stats.broadcast = 0;
1225                 memset(txvq->stats.size_bins, 0,
1226                        sizeof(txvq->stats.size_bins[0]) * 8);
1227         }
1228
1229         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1230                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1231                 if (rxvq == NULL)
1232                         continue;
1233
1234                 rxvq->stats.packets = 0;
1235                 rxvq->stats.bytes = 0;
1236                 rxvq->stats.errors = 0;
1237                 rxvq->stats.multicast = 0;
1238                 rxvq->stats.broadcast = 0;
1239                 memset(rxvq->stats.size_bins, 0,
1240                        sizeof(rxvq->stats.size_bins[0]) * 8);
1241         }
1242
1243         return 0;
1244 }
1245
1246 static void
1247 virtio_set_hwaddr(struct virtio_hw *hw)
1248 {
1249         virtio_write_dev_config(hw,
1250                         offsetof(struct virtio_net_config, mac),
1251                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1252 }
1253
1254 static void
1255 virtio_get_hwaddr(struct virtio_hw *hw)
1256 {
1257         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1258                 virtio_read_dev_config(hw,
1259                         offsetof(struct virtio_net_config, mac),
1260                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1261         } else {
1262                 rte_eth_random_addr(&hw->mac_addr[0]);
1263                 virtio_set_hwaddr(hw);
1264         }
1265 }
1266
1267 static int
1268 virtio_mac_table_set(struct virtio_hw *hw,
1269                      const struct virtio_net_ctrl_mac *uc,
1270                      const struct virtio_net_ctrl_mac *mc)
1271 {
1272         struct virtio_pmd_ctrl ctrl;
1273         int err, len[2];
1274
1275         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1276                 PMD_DRV_LOG(INFO, "host does not support mac table");
1277                 return -1;
1278         }
1279
1280         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1281         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1282
1283         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1284         memcpy(ctrl.data, uc, len[0]);
1285
1286         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1287         memcpy(ctrl.data + len[0], mc, len[1]);
1288
1289         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1290         if (err != 0)
1291                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1292         return err;
1293 }
1294
1295 static int
1296 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1297                     uint32_t index, uint32_t vmdq __rte_unused)
1298 {
1299         struct virtio_hw *hw = dev->data->dev_private;
1300         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1301         unsigned int i;
1302         struct virtio_net_ctrl_mac *uc, *mc;
1303
1304         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1305                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1306                 return -EINVAL;
1307         }
1308
1309         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1310                 sizeof(uc->entries));
1311         uc->entries = 0;
1312         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1313                 sizeof(mc->entries));
1314         mc->entries = 0;
1315
1316         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1317                 const struct rte_ether_addr *addr
1318                         = (i == index) ? mac_addr : addrs + i;
1319                 struct virtio_net_ctrl_mac *tbl
1320                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1321
1322                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1323         }
1324
1325         return virtio_mac_table_set(hw, uc, mc);
1326 }
1327
1328 static void
1329 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1330 {
1331         struct virtio_hw *hw = dev->data->dev_private;
1332         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1333         struct virtio_net_ctrl_mac *uc, *mc;
1334         unsigned int i;
1335
1336         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1337                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1338                 return;
1339         }
1340
1341         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1342                 sizeof(uc->entries));
1343         uc->entries = 0;
1344         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1345                 sizeof(mc->entries));
1346         mc->entries = 0;
1347
1348         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1349                 struct virtio_net_ctrl_mac *tbl;
1350
1351                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1352                         continue;
1353
1354                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1355                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1356                         RTE_ETHER_ADDR_LEN);
1357         }
1358
1359         virtio_mac_table_set(hw, uc, mc);
1360 }
1361
1362 static int
1363 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1364 {
1365         struct virtio_hw *hw = dev->data->dev_private;
1366
1367         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1368
1369         /* Use atomic update if available */
1370         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1371                 struct virtio_pmd_ctrl ctrl;
1372                 int len = RTE_ETHER_ADDR_LEN;
1373
1374                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1375                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1376
1377                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1378                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1379         }
1380
1381         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1382                 return -ENOTSUP;
1383
1384         virtio_set_hwaddr(hw);
1385         return 0;
1386 }
1387
1388 #define CLB_VAL_IDX 0
1389 #define CLB_MSK_IDX 1
1390 #define CLB_MATCH_IDX 2
1391 static int
1392 virtio_monitor_callback(const uint64_t value,
1393                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1394 {
1395         const uint64_t m = opaque[CLB_MSK_IDX];
1396         const uint64_t v = opaque[CLB_VAL_IDX];
1397         const uint64_t c = opaque[CLB_MATCH_IDX];
1398
1399         if (c)
1400                 return (value & m) == v ? -1 : 0;
1401         else
1402                 return (value & m) == v ? 0 : -1;
1403 }
1404
1405 static int
1406 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1407 {
1408         struct virtnet_rx *rxvq = rx_queue;
1409         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1410         struct virtio_hw *hw;
1411
1412         if (vq == NULL)
1413                 return -EINVAL;
1414
1415         hw = vq->hw;
1416         if (virtio_with_packed_queue(hw)) {
1417                 struct vring_packed_desc *desc;
1418                 desc = vq->vq_packed.ring.desc;
1419                 pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1420                 if (vq->vq_packed.used_wrap_counter)
1421                         pmc->opaque[CLB_VAL_IDX] =
1422                                                 VRING_PACKED_DESC_F_AVAIL_USED;
1423                 else
1424                         pmc->opaque[CLB_VAL_IDX] = 0;
1425                 pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1426                 pmc->opaque[CLB_MATCH_IDX] = 1;
1427                 pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1428         } else {
1429                 pmc->addr = &vq->vq_split.ring.used->idx;
1430                 pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1431                                         & (vq->vq_nentries - 1);
1432                 pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1433                 pmc->opaque[CLB_MATCH_IDX] = 0;
1434                 pmc->size = sizeof(vq->vq_split.ring.used->idx);
1435         }
1436         pmc->fn = virtio_monitor_callback;
1437
1438         return 0;
1439 }
1440
1441 static int
1442 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1443 {
1444         struct virtio_hw *hw = dev->data->dev_private;
1445         struct virtio_pmd_ctrl ctrl;
1446         int len;
1447
1448         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1449                 return -ENOTSUP;
1450
1451         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1452         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1453         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1454         len = sizeof(vlan_id);
1455
1456         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1457 }
1458
1459 static int
1460 virtio_intr_unmask(struct rte_eth_dev *dev)
1461 {
1462         struct virtio_hw *hw = dev->data->dev_private;
1463
1464         if (rte_intr_ack(dev->intr_handle) < 0)
1465                 return -1;
1466
1467         if (VIRTIO_OPS(hw)->intr_detect)
1468                 VIRTIO_OPS(hw)->intr_detect(hw);
1469
1470         return 0;
1471 }
1472
1473 static int
1474 virtio_intr_enable(struct rte_eth_dev *dev)
1475 {
1476         struct virtio_hw *hw = dev->data->dev_private;
1477
1478         if (rte_intr_enable(dev->intr_handle) < 0)
1479                 return -1;
1480
1481         if (VIRTIO_OPS(hw)->intr_detect)
1482                 VIRTIO_OPS(hw)->intr_detect(hw);
1483
1484         return 0;
1485 }
1486
1487 static int
1488 virtio_intr_disable(struct rte_eth_dev *dev)
1489 {
1490         struct virtio_hw *hw = dev->data->dev_private;
1491
1492         if (rte_intr_disable(dev->intr_handle) < 0)
1493                 return -1;
1494
1495         if (VIRTIO_OPS(hw)->intr_detect)
1496                 VIRTIO_OPS(hw)->intr_detect(hw);
1497
1498         return 0;
1499 }
1500
1501 static int
1502 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1503 {
1504         uint64_t host_features;
1505
1506         /* Prepare guest_features: feature that driver wants to support */
1507         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1508                 req_features);
1509
1510         /* Read device(host) feature bits */
1511         host_features = VIRTIO_OPS(hw)->get_features(hw);
1512         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1513                 host_features);
1514
1515         /* If supported, ensure MTU value is valid before acknowledging it. */
1516         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1517                 struct virtio_net_config config;
1518
1519                 virtio_read_dev_config(hw,
1520                         offsetof(struct virtio_net_config, mtu),
1521                         &config.mtu, sizeof(config.mtu));
1522
1523                 if (config.mtu < RTE_ETHER_MIN_MTU)
1524                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1525         }
1526
1527         /*
1528          * Negotiate features: Subset of device feature bits are written back
1529          * guest feature bits.
1530          */
1531         hw->guest_features = req_features;
1532         hw->guest_features = virtio_negotiate_features(hw, host_features);
1533         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1534                 hw->guest_features);
1535
1536         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1537                 return -1;
1538
1539         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1540                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1541
1542                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1543                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1544                         return -1;
1545                 }
1546         }
1547
1548         hw->req_guest_features = req_features;
1549
1550         return 0;
1551 }
1552
1553 int
1554 virtio_dev_pause(struct rte_eth_dev *dev)
1555 {
1556         struct virtio_hw *hw = dev->data->dev_private;
1557
1558         rte_spinlock_lock(&hw->state_lock);
1559
1560         if (hw->started == 0) {
1561                 /* Device is just stopped. */
1562                 rte_spinlock_unlock(&hw->state_lock);
1563                 return -1;
1564         }
1565         hw->started = 0;
1566         /*
1567          * Prevent the worker threads from touching queues to avoid contention,
1568          * 1 ms should be enough for the ongoing Tx function to finish.
1569          */
1570         rte_delay_ms(1);
1571         return 0;
1572 }
1573
1574 /*
1575  * Recover hw state to let the worker threads continue.
1576  */
1577 void
1578 virtio_dev_resume(struct rte_eth_dev *dev)
1579 {
1580         struct virtio_hw *hw = dev->data->dev_private;
1581
1582         hw->started = 1;
1583         rte_spinlock_unlock(&hw->state_lock);
1584 }
1585
1586 /*
1587  * Should be called only after device is paused.
1588  */
1589 int
1590 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1591                 int nb_pkts)
1592 {
1593         struct virtio_hw *hw = dev->data->dev_private;
1594         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1595         int ret;
1596
1597         hw->inject_pkts = tx_pkts;
1598         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1599         hw->inject_pkts = NULL;
1600
1601         return ret;
1602 }
1603
1604 static void
1605 virtio_notify_peers(struct rte_eth_dev *dev)
1606 {
1607         struct virtio_hw *hw = dev->data->dev_private;
1608         struct virtnet_rx *rxvq;
1609         struct rte_mbuf *rarp_mbuf;
1610
1611         if (!dev->data->rx_queues)
1612                 return;
1613
1614         rxvq = dev->data->rx_queues[0];
1615         if (!rxvq)
1616                 return;
1617
1618         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1619                         (struct rte_ether_addr *)hw->mac_addr);
1620         if (rarp_mbuf == NULL) {
1621                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1622                 return;
1623         }
1624
1625         /* If virtio port just stopped, no need to send RARP */
1626         if (virtio_dev_pause(dev) < 0) {
1627                 rte_pktmbuf_free(rarp_mbuf);
1628                 return;
1629         }
1630
1631         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1632         virtio_dev_resume(dev);
1633 }
1634
1635 static void
1636 virtio_ack_link_announce(struct rte_eth_dev *dev)
1637 {
1638         struct virtio_hw *hw = dev->data->dev_private;
1639         struct virtio_pmd_ctrl ctrl;
1640
1641         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1642         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1643
1644         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1645 }
1646
1647 /*
1648  * Process virtio config changed interrupt. Call the callback
1649  * if link state changed, generate gratuitous RARP packet if
1650  * the status indicates an ANNOUNCE.
1651  */
1652 void
1653 virtio_interrupt_handler(void *param)
1654 {
1655         struct rte_eth_dev *dev = param;
1656         struct virtio_hw *hw = dev->data->dev_private;
1657         uint8_t isr;
1658         uint16_t status;
1659
1660         /* Read interrupt status which clears interrupt */
1661         isr = virtio_get_isr(hw);
1662         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1663
1664         if (virtio_intr_unmask(dev) < 0)
1665                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1666
1667         if (isr & VIRTIO_ISR_CONFIG) {
1668                 if (virtio_dev_link_update(dev, 0) == 0)
1669                         rte_eth_dev_callback_process(dev,
1670                                                      RTE_ETH_EVENT_INTR_LSC,
1671                                                      NULL);
1672
1673                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1674                         virtio_read_dev_config(hw,
1675                                 offsetof(struct virtio_net_config, status),
1676                                 &status, sizeof(status));
1677                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1678                                 virtio_notify_peers(dev);
1679                                 if (hw->cvq)
1680                                         virtio_ack_link_announce(dev);
1681                         }
1682                 }
1683         }
1684 }
1685
1686 /* set rx and tx handlers according to what is supported */
1687 static void
1688 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1689 {
1690         struct virtio_hw *hw = eth_dev->data->dev_private;
1691
1692         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1693         if (virtio_with_packed_queue(hw)) {
1694                 PMD_INIT_LOG(INFO,
1695                         "virtio: using packed ring %s Tx path on port %u",
1696                         hw->use_vec_tx ? "vectorized" : "standard",
1697                         eth_dev->data->port_id);
1698                 if (hw->use_vec_tx)
1699                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1700                 else
1701                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1702         } else {
1703                 if (hw->use_inorder_tx) {
1704                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1705                                 eth_dev->data->port_id);
1706                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1707                 } else {
1708                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1709                                 eth_dev->data->port_id);
1710                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1711                 }
1712         }
1713
1714         if (virtio_with_packed_queue(hw)) {
1715                 if (hw->use_vec_rx) {
1716                         PMD_INIT_LOG(INFO,
1717                                 "virtio: using packed ring vectorized Rx path on port %u",
1718                                 eth_dev->data->port_id);
1719                         eth_dev->rx_pkt_burst =
1720                                 &virtio_recv_pkts_packed_vec;
1721                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1722                         PMD_INIT_LOG(INFO,
1723                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1724                                 eth_dev->data->port_id);
1725                         eth_dev->rx_pkt_burst =
1726                                 &virtio_recv_mergeable_pkts_packed;
1727                 } else {
1728                         PMD_INIT_LOG(INFO,
1729                                 "virtio: using packed ring standard Rx path on port %u",
1730                                 eth_dev->data->port_id);
1731                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1732                 }
1733         } else {
1734                 if (hw->use_vec_rx) {
1735                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1736                                 eth_dev->data->port_id);
1737                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1738                 } else if (hw->use_inorder_rx) {
1739                         PMD_INIT_LOG(INFO,
1740                                 "virtio: using inorder Rx path on port %u",
1741                                 eth_dev->data->port_id);
1742                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1743                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1744                         PMD_INIT_LOG(INFO,
1745                                 "virtio: using mergeable buffer Rx path on port %u",
1746                                 eth_dev->data->port_id);
1747                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1748                 } else {
1749                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1750                                 eth_dev->data->port_id);
1751                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1752                 }
1753         }
1754
1755 }
1756
1757 /* Only support 1:1 queue/interrupt mapping so far.
1758  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1759  * interrupt vectors (<N+1).
1760  */
1761 static int
1762 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1763 {
1764         uint32_t i;
1765         struct virtio_hw *hw = dev->data->dev_private;
1766
1767         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1768         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1769                 if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1770                                                        i + 1))
1771                         return -rte_errno;
1772                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1773                                                  VIRTIO_MSI_NO_VECTOR) {
1774                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1775                         return -EBUSY;
1776                 }
1777         }
1778
1779         return 0;
1780 }
1781
1782 static void
1783 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1784 {
1785         uint32_t i;
1786         struct virtio_hw *hw = dev->data->dev_private;
1787
1788         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1789         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1790                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1791                                              hw->vqs[i * VTNET_CQ],
1792                                              VIRTIO_MSI_NO_VECTOR);
1793 }
1794
1795 static int
1796 virtio_configure_intr(struct rte_eth_dev *dev)
1797 {
1798         struct virtio_hw *hw = dev->data->dev_private;
1799
1800         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1801                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1802                 return -ENOTSUP;
1803         }
1804
1805         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1806                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1807                 return -1;
1808         }
1809
1810         if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1811                                     hw->max_queue_pairs)) {
1812                 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1813                              hw->max_queue_pairs);
1814                 return -ENOMEM;
1815         }
1816
1817         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1818                 /* Re-register callback to update max_intr */
1819                 rte_intr_callback_unregister(dev->intr_handle,
1820                                              virtio_interrupt_handler,
1821                                              dev);
1822                 rte_intr_callback_register(dev->intr_handle,
1823                                            virtio_interrupt_handler,
1824                                            dev);
1825         }
1826
1827         /* DO NOT try to remove this! This function will enable msix, or QEMU
1828          * will encounter SIGSEGV when DRIVER_OK is sent.
1829          * And for legacy devices, this should be done before queue/vec binding
1830          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1831          * (22) will be ignored.
1832          */
1833         if (virtio_intr_enable(dev) < 0) {
1834                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1835                 return -1;
1836         }
1837
1838         if (virtio_queues_bind_intr(dev) < 0) {
1839                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1840                 return -1;
1841         }
1842
1843         return 0;
1844 }
1845
1846 static uint64_t
1847 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1848 {
1849         uint64_t virtio_hash_types = 0;
1850
1851         if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1852                                 RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1853                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1854
1855         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1856                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1857
1858         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1859                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1860
1861         if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1862                                 RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1863                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1864
1865         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1866                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1867
1868         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1869                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1870
1871         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1872                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1873
1874         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1875                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1876
1877         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1878                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1879
1880         return virtio_hash_types;
1881 }
1882
1883 static uint64_t
1884 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1885 {
1886         uint64_t rss_offloads = 0;
1887
1888         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1889                 rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1890                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1891
1892         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1893                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1894
1895         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1896                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1897
1898         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1899                 rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1900                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1901
1902         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1903                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1904
1905         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1906                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1907
1908         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1909                 rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1910
1911         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1912                 rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1913
1914         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1915                 rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1916
1917         return rss_offloads;
1918 }
1919
1920 static int
1921 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1922 {
1923         struct virtio_net_config local_config;
1924         struct virtio_net_config *config = &local_config;
1925
1926         virtio_read_dev_config(hw,
1927                         offsetof(struct virtio_net_config, rss_max_key_size),
1928                         &config->rss_max_key_size,
1929                         sizeof(config->rss_max_key_size));
1930         if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1931                 PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1932                                 config->rss_max_key_size);
1933                 return -EINVAL;
1934         }
1935
1936         virtio_read_dev_config(hw,
1937                         offsetof(struct virtio_net_config,
1938                                 rss_max_indirection_table_length),
1939                         &config->rss_max_indirection_table_length,
1940                         sizeof(config->rss_max_indirection_table_length));
1941         if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1942                 PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1943                                 config->rss_max_indirection_table_length);
1944                 return -EINVAL;
1945         }
1946
1947         virtio_read_dev_config(hw,
1948                         offsetof(struct virtio_net_config, supported_hash_types),
1949                         &config->supported_hash_types,
1950                         sizeof(config->supported_hash_types));
1951         if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1952                 PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1953                                 config->supported_hash_types);
1954                 return -EINVAL;
1955         }
1956
1957         *rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1958
1959         PMD_INIT_LOG(DEBUG, "Device RSS config:");
1960         PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1961         PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1962         PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1963
1964         return 0;
1965 }
1966
1967 static int
1968 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1969                 struct rte_eth_rss_conf *rss_conf)
1970 {
1971         struct virtio_hw *hw = dev->data->dev_private;
1972         char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1973         uint32_t old_hash_types;
1974         uint16_t nb_queues;
1975         int ret;
1976
1977         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
1978                 return -ENOTSUP;
1979
1980         if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
1981                 return -EINVAL;
1982
1983         old_hash_types = hw->rss_hash_types;
1984         hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
1985
1986         if (rss_conf->rss_key && rss_conf->rss_key_len) {
1987                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
1988                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
1989                                         VIRTIO_NET_RSS_KEY_SIZE);
1990                         ret = -EINVAL;
1991                         goto restore_types;
1992                 }
1993                 memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
1994                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
1995         }
1996
1997         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
1998         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
1999         if (ret < 0) {
2000                 PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2001                 goto restore_key;
2002         }
2003
2004         return 0;
2005 restore_key:
2006         memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2007 restore_types:
2008         hw->rss_hash_types = old_hash_types;
2009
2010         return ret;
2011 }
2012
2013 static int
2014 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2015                 struct rte_eth_rss_conf *rss_conf)
2016 {
2017         struct virtio_hw *hw = dev->data->dev_private;
2018
2019         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2020                 return -ENOTSUP;
2021
2022         if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2023                 memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2024         rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2025         rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2026
2027         return 0;
2028 }
2029
2030 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2031                          struct rte_eth_rss_reta_entry64 *reta_conf,
2032                          uint16_t reta_size)
2033 {
2034         struct virtio_hw *hw = dev->data->dev_private;
2035         uint16_t nb_queues;
2036         uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2037         int idx, pos, i, ret;
2038
2039         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2040                 return -ENOTSUP;
2041
2042         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2043                 return -EINVAL;
2044
2045         memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2046
2047         for (i = 0; i < reta_size; i++) {
2048                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2049                 pos = i % RTE_ETH_RETA_GROUP_SIZE;
2050
2051                 if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2052                         continue;
2053
2054                 hw->rss_reta[i] = reta_conf[idx].reta[pos];
2055         }
2056
2057         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2058         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2059         if (ret < 0) {
2060                 PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2061                 memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2062         }
2063
2064         hw->rss_rx_queues = dev->data->nb_rx_queues;
2065
2066         return ret;
2067 }
2068
2069 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2070                          struct rte_eth_rss_reta_entry64 *reta_conf,
2071                          uint16_t reta_size)
2072 {
2073         struct virtio_hw *hw = dev->data->dev_private;
2074         int idx, i;
2075
2076         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2077                 return -ENOTSUP;
2078
2079         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2080                 return -EINVAL;
2081
2082         for (i = 0; i < reta_size; i++) {
2083                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2084                 reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2085         }
2086
2087         return 0;
2088 }
2089
2090 /*
2091  * As default RSS hash key, it uses the default key of the
2092  * Intel IXGBE devices. It can be updated by the application
2093  * with any 40B key value.
2094  */
2095 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2096         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2097         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2098         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2099         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2100         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2101 };
2102
2103 static int
2104 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2105 {
2106         struct virtio_hw *hw = eth_dev->data->dev_private;
2107         uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2108         struct rte_eth_rss_conf *rss_conf;
2109         int ret, i;
2110
2111         if (!nb_rx_queues) {
2112                 PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2113                 return -EINVAL;
2114         }
2115
2116         rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2117
2118         ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2119         if (ret)
2120                 return ret;
2121
2122         if (rss_conf->rss_hf) {
2123                 /*  Ensure requested hash types are supported by the device */
2124                 if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2125                         return -EINVAL;
2126
2127                 hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2128         }
2129
2130         if (!hw->rss_key) {
2131                 /* Setup default RSS key if not already setup by the user */
2132                 hw->rss_key = rte_malloc_socket("rss_key",
2133                                 VIRTIO_NET_RSS_KEY_SIZE, 0,
2134                                 eth_dev->device->numa_node);
2135                 if (!hw->rss_key) {
2136                         PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2137                         return -1;
2138                 }
2139         }
2140
2141         if (rss_conf->rss_key && rss_conf->rss_key_len) {
2142                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2143                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2144                                         VIRTIO_NET_RSS_KEY_SIZE);
2145                         return -EINVAL;
2146                 }
2147                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2148         } else {
2149                 memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2150         }
2151
2152         if (!hw->rss_reta) {
2153                 /* Setup default RSS reta if not already setup by the user */
2154                 hw->rss_reta = rte_zmalloc_socket("rss_reta",
2155                                 VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2156                                 eth_dev->device->numa_node);
2157                 if (!hw->rss_reta) {
2158                         PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2159                         return -1;
2160                 }
2161
2162                 hw->rss_rx_queues = 0;
2163         }
2164
2165         /* Re-initialize the RSS reta if the number of RX queues has changed */
2166         if (hw->rss_rx_queues != nb_rx_queues) {
2167                 for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2168                         hw->rss_reta[i] = i % nb_rx_queues;
2169                 hw->rss_rx_queues = nb_rx_queues;
2170         }
2171
2172         return 0;
2173 }
2174
2175 #define DUPLEX_UNKNOWN   0xff
2176 /* reset device and renegotiate features if needed */
2177 static int
2178 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2179 {
2180         struct virtio_hw *hw = eth_dev->data->dev_private;
2181         struct virtio_net_config *config;
2182         struct virtio_net_config local_config;
2183         int ret;
2184
2185         /* Reset the device although not necessary at startup */
2186         virtio_reset(hw);
2187
2188         if (hw->vqs) {
2189                 virtio_dev_free_mbufs(eth_dev);
2190                 virtio_free_queues(hw);
2191         }
2192
2193         /* Tell the host we've noticed this device. */
2194         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2195
2196         /* Tell the host we've known how to drive the device. */
2197         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2198         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2199                 return -1;
2200
2201         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2202
2203         /* If host does not support both status and MSI-X then disable LSC */
2204         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2205                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2206         else
2207                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2208
2209         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2210
2211         /* Setting up rx_header size for the device */
2212         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2213             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2214             virtio_with_packed_queue(hw))
2215                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2216         else
2217                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2218
2219         /* Copy the permanent MAC address to: virtio_hw */
2220         virtio_get_hwaddr(hw);
2221         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2222                         &eth_dev->data->mac_addrs[0]);
2223         PMD_INIT_LOG(DEBUG,
2224                      "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2225                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2226                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2227
2228         if (hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN) {
2229                 if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
2230                         config = &local_config;
2231                         virtio_read_dev_config(hw,
2232                                 offsetof(struct virtio_net_config, speed),
2233                                 &config->speed, sizeof(config->speed));
2234                         virtio_read_dev_config(hw,
2235                                 offsetof(struct virtio_net_config, duplex),
2236                                 &config->duplex, sizeof(config->duplex));
2237                         hw->speed = config->speed;
2238                         hw->duplex = config->duplex;
2239                 }
2240         }
2241         if (hw->duplex == DUPLEX_UNKNOWN)
2242                 hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2243         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2244                 hw->speed, hw->duplex);
2245         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2246                 config = &local_config;
2247
2248                 virtio_read_dev_config(hw,
2249                         offsetof(struct virtio_net_config, mac),
2250                         &config->mac, sizeof(config->mac));
2251
2252                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2253                         virtio_read_dev_config(hw,
2254                                 offsetof(struct virtio_net_config, status),
2255                                 &config->status, sizeof(config->status));
2256                 } else {
2257                         PMD_INIT_LOG(DEBUG,
2258                                      "VIRTIO_NET_F_STATUS is not supported");
2259                         config->status = 0;
2260                 }
2261
2262                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2263                                 virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2264                         virtio_read_dev_config(hw,
2265                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
2266                                 &config->max_virtqueue_pairs,
2267                                 sizeof(config->max_virtqueue_pairs));
2268                 } else {
2269                         PMD_INIT_LOG(DEBUG,
2270                                      "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2271                         config->max_virtqueue_pairs = 1;
2272                 }
2273
2274                 hw->max_queue_pairs = config->max_virtqueue_pairs;
2275
2276                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2277                         virtio_read_dev_config(hw,
2278                                 offsetof(struct virtio_net_config, mtu),
2279                                 &config->mtu,
2280                                 sizeof(config->mtu));
2281
2282                         /*
2283                          * MTU value has already been checked at negotiation
2284                          * time, but check again in case it has changed since
2285                          * then, which should not happen.
2286                          */
2287                         if (config->mtu < RTE_ETHER_MIN_MTU) {
2288                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2289                                                 config->mtu);
2290                                 return -1;
2291                         }
2292
2293                         hw->max_mtu = config->mtu;
2294                         /* Set initial MTU to maximum one supported by vhost */
2295                         eth_dev->data->mtu = config->mtu;
2296
2297                 } else {
2298                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2299                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
2300                 }
2301
2302                 hw->rss_hash_types = 0;
2303                 if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2304                         if (virtio_dev_rss_init(eth_dev))
2305                                 return -1;
2306
2307                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2308                                 config->max_virtqueue_pairs);
2309                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2310                 PMD_INIT_LOG(DEBUG,
2311                                 "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2312                                 config->mac[0], config->mac[1],
2313                                 config->mac[2], config->mac[3],
2314                                 config->mac[4], config->mac[5]);
2315         } else {
2316                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2317                 hw->max_queue_pairs = 1;
2318                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2319                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
2320         }
2321
2322         ret = virtio_alloc_queues(eth_dev);
2323         if (ret < 0)
2324                 return ret;
2325
2326         if (eth_dev->data->dev_conf.intr_conf.rxq) {
2327                 if (virtio_configure_intr(eth_dev) < 0) {
2328                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
2329                         virtio_free_queues(hw);
2330                         return -1;
2331                 }
2332         }
2333
2334         virtio_reinit_complete(hw);
2335
2336         return 0;
2337 }
2338
2339 /*
2340  * This function is based on probe() function in virtio_pci.c
2341  * It returns 0 on success.
2342  */
2343 int
2344 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2345 {
2346         struct virtio_hw *hw = eth_dev->data->dev_private;
2347         uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2348         int vectorized = 0;
2349         int ret;
2350
2351         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2352                 PMD_INIT_LOG(ERR,
2353                         "Not sufficient headroom required = %d, avail = %d",
2354                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2355                         RTE_PKTMBUF_HEADROOM);
2356
2357                 return -1;
2358         }
2359
2360         eth_dev->dev_ops = &virtio_eth_dev_ops;
2361
2362         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2363                 set_rxtx_funcs(eth_dev);
2364                 return 0;
2365         }
2366
2367         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2368         if (ret < 0)
2369                 return ret;
2370         hw->speed = speed;
2371         hw->duplex = DUPLEX_UNKNOWN;
2372
2373         /* Allocate memory for storing MAC addresses */
2374         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2375                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2376         if (eth_dev->data->mac_addrs == NULL) {
2377                 PMD_INIT_LOG(ERR,
2378                         "Failed to allocate %d bytes needed to store MAC addresses",
2379                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2380                 return -ENOMEM;
2381         }
2382
2383         rte_spinlock_init(&hw->state_lock);
2384
2385         /* reset device and negotiate default features */
2386         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2387         if (ret < 0)
2388                 goto err_virtio_init;
2389
2390         if (vectorized) {
2391                 if (!virtio_with_packed_queue(hw)) {
2392                         hw->use_vec_rx = 1;
2393                 } else {
2394 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2395                         hw->use_vec_rx = 1;
2396                         hw->use_vec_tx = 1;
2397 #else
2398                         PMD_DRV_LOG(INFO,
2399                                 "building environment do not support packed ring vectorized");
2400 #endif
2401                 }
2402         }
2403
2404         hw->opened = 1;
2405
2406         return 0;
2407
2408 err_virtio_init:
2409         rte_free(eth_dev->data->mac_addrs);
2410         eth_dev->data->mac_addrs = NULL;
2411         return ret;
2412 }
2413
2414 static uint32_t
2415 virtio_dev_speed_capa_get(uint32_t speed)
2416 {
2417         switch (speed) {
2418         case RTE_ETH_SPEED_NUM_10G:
2419                 return RTE_ETH_LINK_SPEED_10G;
2420         case RTE_ETH_SPEED_NUM_20G:
2421                 return RTE_ETH_LINK_SPEED_20G;
2422         case RTE_ETH_SPEED_NUM_25G:
2423                 return RTE_ETH_LINK_SPEED_25G;
2424         case RTE_ETH_SPEED_NUM_40G:
2425                 return RTE_ETH_LINK_SPEED_40G;
2426         case RTE_ETH_SPEED_NUM_50G:
2427                 return RTE_ETH_LINK_SPEED_50G;
2428         case RTE_ETH_SPEED_NUM_56G:
2429                 return RTE_ETH_LINK_SPEED_56G;
2430         case RTE_ETH_SPEED_NUM_100G:
2431                 return RTE_ETH_LINK_SPEED_100G;
2432         case RTE_ETH_SPEED_NUM_200G:
2433                 return RTE_ETH_LINK_SPEED_200G;
2434         default:
2435                 return 0;
2436         }
2437 }
2438
2439 static int vectorized_check_handler(__rte_unused const char *key,
2440                 const char *value, void *ret_val)
2441 {
2442         if (strcmp(value, "1") == 0)
2443                 *(int *)ret_val = 1;
2444         else
2445                 *(int *)ret_val = 0;
2446
2447         return 0;
2448 }
2449
2450 #define VIRTIO_ARG_SPEED      "speed"
2451 #define VIRTIO_ARG_VECTORIZED "vectorized"
2452
2453 static int
2454 link_speed_handler(const char *key __rte_unused,
2455                 const char *value, void *ret_val)
2456 {
2457         uint32_t val;
2458         if (!value || !ret_val)
2459                 return -EINVAL;
2460         val = strtoul(value, NULL, 0);
2461         /* validate input */
2462         if (virtio_dev_speed_capa_get(val) == 0)
2463                 return -EINVAL;
2464         *(uint32_t *)ret_val = val;
2465
2466         return 0;
2467 }
2468
2469
2470 static int
2471 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2472 {
2473         struct rte_kvargs *kvlist;
2474         int ret = 0;
2475
2476         if (devargs == NULL)
2477                 return 0;
2478
2479         kvlist = rte_kvargs_parse(devargs->args, NULL);
2480         if (kvlist == NULL) {
2481                 PMD_INIT_LOG(ERR, "error when parsing param");
2482                 return 0;
2483         }
2484
2485         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2486                 ret = rte_kvargs_process(kvlist,
2487                                         VIRTIO_ARG_SPEED,
2488                                         link_speed_handler, speed);
2489                 if (ret < 0) {
2490                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2491                                         VIRTIO_ARG_SPEED);
2492                         goto exit;
2493                 }
2494         }
2495
2496         if (vectorized &&
2497                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2498                 ret = rte_kvargs_process(kvlist,
2499                                 VIRTIO_ARG_VECTORIZED,
2500                                 vectorized_check_handler, vectorized);
2501                 if (ret < 0) {
2502                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2503                                         VIRTIO_ARG_VECTORIZED);
2504                         goto exit;
2505                 }
2506         }
2507
2508 exit:
2509         rte_kvargs_free(kvlist);
2510         return ret;
2511 }
2512
2513 static uint8_t
2514 rx_offload_enabled(struct virtio_hw *hw)
2515 {
2516         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2517                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2518                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2519 }
2520
2521 static uint8_t
2522 tx_offload_enabled(struct virtio_hw *hw)
2523 {
2524         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2525                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2526                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2527 }
2528
2529 /*
2530  * Configure virtio device
2531  * It returns 0 on success.
2532  */
2533 static int
2534 virtio_dev_configure(struct rte_eth_dev *dev)
2535 {
2536         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2537         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2538         struct virtio_hw *hw = dev->data->dev_private;
2539         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2540                 hw->vtnet_hdr_size;
2541         uint64_t rx_offloads = rxmode->offloads;
2542         uint64_t tx_offloads = txmode->offloads;
2543         uint64_t req_features;
2544         int ret;
2545
2546         PMD_INIT_LOG(DEBUG, "configure");
2547         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2548
2549         if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2550                 PMD_DRV_LOG(ERR,
2551                         "Unsupported Rx multi queue mode %d",
2552                         rxmode->mq_mode);
2553                 return -EINVAL;
2554         }
2555
2556         if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2557                 PMD_DRV_LOG(ERR,
2558                         "Unsupported Tx multi queue mode %d",
2559                         txmode->mq_mode);
2560                 return -EINVAL;
2561         }
2562
2563         if (dev->data->dev_conf.intr_conf.rxq) {
2564                 ret = virtio_init_device(dev, hw->req_guest_features);
2565                 if (ret < 0)
2566                         return ret;
2567         }
2568
2569         if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2570                 req_features |= (1ULL << VIRTIO_NET_F_RSS);
2571
2572         if (rxmode->mtu > hw->max_mtu)
2573                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2574
2575         hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2576
2577         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2578                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2579                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2580
2581         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2582                 req_features |=
2583                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2584                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2585
2586         if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2587                            RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2588                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2589
2590         if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2591                 req_features |=
2592                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2593                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2594
2595         /* if request features changed, reinit the device */
2596         if (req_features != hw->req_guest_features) {
2597                 ret = virtio_init_device(dev, req_features);
2598                 if (ret < 0)
2599                         return ret;
2600         }
2601
2602         if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2603                         !virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2604                 PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2605                 return -ENOTSUP;
2606         }
2607
2608         if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2609                             RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2610                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2611                 PMD_DRV_LOG(ERR,
2612                         "rx checksum not available on this host");
2613                 return -ENOTSUP;
2614         }
2615
2616         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2617                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2618                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2619                 PMD_DRV_LOG(ERR,
2620                         "Large Receive Offload not available on this host");
2621                 return -ENOTSUP;
2622         }
2623
2624         /* start control queue */
2625         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2626                 virtio_dev_cq_start(dev);
2627
2628         if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2629                 hw->vlan_strip = 1;
2630
2631         hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2632
2633         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2634                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2635                 PMD_DRV_LOG(ERR,
2636                             "vlan filtering not available on this host");
2637                 return -ENOTSUP;
2638         }
2639
2640         hw->has_tx_offload = tx_offload_enabled(hw);
2641         hw->has_rx_offload = rx_offload_enabled(hw);
2642
2643         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2644                 /* Enable vector (0) for Link State Intrerrupt */
2645                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2646                                 VIRTIO_MSI_NO_VECTOR) {
2647                         PMD_DRV_LOG(ERR, "failed to set config vector");
2648                         return -EBUSY;
2649                 }
2650
2651         if (virtio_with_packed_queue(hw)) {
2652 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2653                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2654                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2655                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2656                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2657                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2658                         PMD_DRV_LOG(INFO,
2659                                 "disabled packed ring vectorized path for requirements not met");
2660                         hw->use_vec_rx = 0;
2661                         hw->use_vec_tx = 0;
2662                 }
2663 #elif defined(RTE_ARCH_ARM)
2664                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2665                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2666                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2667                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2668                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2669                         PMD_DRV_LOG(INFO,
2670                                 "disabled packed ring vectorized path for requirements not met");
2671                         hw->use_vec_rx = 0;
2672                         hw->use_vec_tx = 0;
2673                 }
2674 #else
2675                 hw->use_vec_rx = 0;
2676                 hw->use_vec_tx = 0;
2677 #endif
2678
2679                 if (hw->use_vec_rx) {
2680                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2681                                 PMD_DRV_LOG(INFO,
2682                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2683                                 hw->use_vec_rx = 0;
2684                         }
2685
2686                         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2687                                 PMD_DRV_LOG(INFO,
2688                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2689                                 hw->use_vec_rx = 0;
2690                         }
2691                 }
2692         } else {
2693                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2694                         hw->use_inorder_tx = 1;
2695                         hw->use_inorder_rx = 1;
2696                         hw->use_vec_rx = 0;
2697                 }
2698
2699                 if (hw->use_vec_rx) {
2700 #if defined RTE_ARCH_ARM
2701                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2702                                 PMD_DRV_LOG(INFO,
2703                                         "disabled split ring vectorized path for requirement not met");
2704                                 hw->use_vec_rx = 0;
2705                         }
2706 #endif
2707                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2708                                 PMD_DRV_LOG(INFO,
2709                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2710                                 hw->use_vec_rx = 0;
2711                         }
2712
2713                         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2714                                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2715                                            RTE_ETH_RX_OFFLOAD_TCP_LRO |
2716                                            RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2717                                 PMD_DRV_LOG(INFO,
2718                                         "disabled split ring vectorized rx for offloading enabled");
2719                                 hw->use_vec_rx = 0;
2720                         }
2721
2722                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2723                                 PMD_DRV_LOG(INFO,
2724                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2725                                 hw->use_vec_rx = 0;
2726                         }
2727                 }
2728         }
2729
2730         return 0;
2731 }
2732
2733
2734 static int
2735 virtio_dev_start(struct rte_eth_dev *dev)
2736 {
2737         uint16_t nb_queues, i;
2738         struct virtqueue *vq;
2739         struct virtio_hw *hw = dev->data->dev_private;
2740         int ret;
2741
2742         /* Finish the initialization of the queues */
2743         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2744                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2745                 if (ret < 0)
2746                         return ret;
2747         }
2748         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2749                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2750                 if (ret < 0)
2751                         return ret;
2752         }
2753
2754         /* check if lsc interrupt feature is enabled */
2755         if (dev->data->dev_conf.intr_conf.lsc) {
2756                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2757                         PMD_DRV_LOG(ERR, "link status not supported by host");
2758                         return -ENOTSUP;
2759                 }
2760         }
2761
2762         /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2763          * in device configure, but it could be unmapped  when device is
2764          * stopped.
2765          */
2766         if (dev->data->dev_conf.intr_conf.lsc ||
2767             dev->data->dev_conf.intr_conf.rxq) {
2768                 virtio_intr_disable(dev);
2769
2770                 /* Setup interrupt callback  */
2771                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2772                         rte_intr_callback_register(dev->intr_handle,
2773                                                    virtio_interrupt_handler,
2774                                                    dev);
2775
2776                 if (virtio_intr_enable(dev) < 0) {
2777                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2778                         return -EIO;
2779                 }
2780         }
2781
2782         /*Notify the backend
2783          *Otherwise the tap backend might already stop its queue due to fullness.
2784          *vhost backend will have no chance to be waked up
2785          */
2786         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2787         if (hw->max_queue_pairs > 1) {
2788                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2789                         return -EINVAL;
2790         }
2791
2792         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2793
2794         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2795                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2796                 /* Flush the old packets */
2797                 virtqueue_rxvq_flush(vq);
2798                 virtqueue_notify(vq);
2799         }
2800
2801         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2802                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2803                 virtqueue_notify(vq);
2804         }
2805
2806         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2807
2808         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2809                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2810                 VIRTQUEUE_DUMP(vq);
2811         }
2812
2813         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2814                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2815                 VIRTQUEUE_DUMP(vq);
2816         }
2817
2818         set_rxtx_funcs(dev);
2819         hw->started = 1;
2820
2821         /* Initialize Link state */
2822         virtio_dev_link_update(dev, 0);
2823
2824         return 0;
2825 }
2826
2827 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2828 {
2829         struct virtio_hw *hw = dev->data->dev_private;
2830         uint16_t nr_vq = virtio_get_nr_vq(hw);
2831         const char *type __rte_unused;
2832         unsigned int i, mbuf_num = 0;
2833         struct virtqueue *vq;
2834         struct rte_mbuf *buf;
2835         int queue_type;
2836
2837         if (hw->vqs == NULL)
2838                 return;
2839
2840         for (i = 0; i < nr_vq; i++) {
2841                 vq = hw->vqs[i];
2842                 if (!vq)
2843                         continue;
2844
2845                 queue_type = virtio_get_queue_type(hw, i);
2846                 if (queue_type == VTNET_RQ)
2847                         type = "rxq";
2848                 else if (queue_type == VTNET_TQ)
2849                         type = "txq";
2850                 else
2851                         continue;
2852
2853                 PMD_INIT_LOG(DEBUG,
2854                         "Before freeing %s[%d] used and unused buf",
2855                         type, i);
2856                 VIRTQUEUE_DUMP(vq);
2857
2858                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2859                         rte_pktmbuf_free(buf);
2860                         mbuf_num++;
2861                 }
2862
2863                 PMD_INIT_LOG(DEBUG,
2864                         "After freeing %s[%d] used and unused buf",
2865                         type, i);
2866                 VIRTQUEUE_DUMP(vq);
2867         }
2868
2869         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2870 }
2871
2872 static void
2873 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2874 {
2875         struct virtio_hw *hw = dev->data->dev_private;
2876         struct virtqueue *vq;
2877         int qidx;
2878         void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2879
2880         if (virtio_with_packed_queue(hw)) {
2881                 if (hw->use_vec_tx)
2882                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2883                 else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2884                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2885                 else
2886                         xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2887         } else {
2888                 if (hw->use_inorder_tx)
2889                         xmit_cleanup = &virtio_xmit_cleanup_inorder;
2890                 else
2891                         xmit_cleanup = &virtio_xmit_cleanup;
2892         }
2893
2894         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2895                 vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2896                 if (vq != NULL)
2897                         xmit_cleanup(vq, virtqueue_nused(vq));
2898         }
2899 }
2900
2901 /*
2902  * Stop device: disable interrupt and mark link down
2903  */
2904 int
2905 virtio_dev_stop(struct rte_eth_dev *dev)
2906 {
2907         struct virtio_hw *hw = dev->data->dev_private;
2908         struct rte_eth_link link;
2909         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2910
2911         PMD_INIT_LOG(DEBUG, "stop");
2912         dev->data->dev_started = 0;
2913
2914         rte_spinlock_lock(&hw->state_lock);
2915         if (!hw->started)
2916                 goto out_unlock;
2917         hw->started = 0;
2918
2919         virtio_tx_completed_cleanup(dev);
2920
2921         if (intr_conf->lsc || intr_conf->rxq) {
2922                 virtio_intr_disable(dev);
2923
2924                 /* Reset interrupt callback  */
2925                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2926                         rte_intr_callback_unregister(dev->intr_handle,
2927                                                      virtio_interrupt_handler,
2928                                                      dev);
2929                 }
2930         }
2931
2932         memset(&link, 0, sizeof(link));
2933         rte_eth_linkstatus_set(dev, &link);
2934 out_unlock:
2935         rte_spinlock_unlock(&hw->state_lock);
2936
2937         return 0;
2938 }
2939
2940 static int
2941 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2942 {
2943         struct rte_eth_link link;
2944         uint16_t status;
2945         struct virtio_hw *hw = dev->data->dev_private;
2946
2947         memset(&link, 0, sizeof(link));
2948         link.link_duplex = hw->duplex;
2949         link.link_speed  = hw->speed;
2950         link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2951
2952         if (!hw->started) {
2953                 link.link_status = RTE_ETH_LINK_DOWN;
2954                 link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2955         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2956                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2957                 virtio_read_dev_config(hw,
2958                                 offsetof(struct virtio_net_config, status),
2959                                 &status, sizeof(status));
2960                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2961                         link.link_status = RTE_ETH_LINK_DOWN;
2962                         link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2963                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2964                                      dev->data->port_id);
2965                 } else {
2966                         link.link_status = RTE_ETH_LINK_UP;
2967                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2968                                      dev->data->port_id);
2969                 }
2970         } else {
2971                 link.link_status = RTE_ETH_LINK_UP;
2972         }
2973
2974         return rte_eth_linkstatus_set(dev, &link);
2975 }
2976
2977 static int
2978 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2979 {
2980         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2981         struct virtio_hw *hw = dev->data->dev_private;
2982         uint64_t offloads = rxmode->offloads;
2983
2984         if (mask & RTE_ETH_VLAN_FILTER_MASK) {
2985                 if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2986                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2987
2988                         PMD_DRV_LOG(NOTICE,
2989                                 "vlan filtering not available on this host");
2990
2991                         return -ENOTSUP;
2992                 }
2993         }
2994
2995         if (mask & RTE_ETH_VLAN_STRIP_MASK)
2996                 hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
2997
2998         return 0;
2999 }
3000
3001 static int
3002 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3003 {
3004         uint64_t tso_mask, host_features;
3005         uint32_t rss_hash_types = 0;
3006         struct virtio_hw *hw = dev->data->dev_private;
3007         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3008
3009         dev_info->max_rx_queues =
3010                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3011         dev_info->max_tx_queues =
3012                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3013         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3014         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3015         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3016         dev_info->max_mtu = hw->max_mtu;
3017
3018         host_features = VIRTIO_OPS(hw)->get_features(hw);
3019         dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3020         if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3021                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3022         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3023                 dev_info->rx_offload_capa |=
3024                         RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3025                         RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3026         }
3027         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3028                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3029         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3030                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
3031         if ((host_features & tso_mask) == tso_mask)
3032                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3033
3034         dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3035                                     RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3036         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3037                 dev_info->tx_offload_capa |=
3038                         RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3039                         RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3040         }
3041         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3042                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
3043         if ((host_features & tso_mask) == tso_mask)
3044                 dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3045
3046         if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3047                 virtio_dev_get_rss_config(hw, &rss_hash_types);
3048                 dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3049                 dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3050                 dev_info->flow_type_rss_offloads =
3051                         virtio_to_ethdev_rss_offloads(rss_hash_types);
3052         } else {
3053                 dev_info->hash_key_size = 0;
3054                 dev_info->reta_size = 0;
3055                 dev_info->flow_type_rss_offloads = 0;
3056         }
3057
3058         if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3059                 /*
3060                  * According to 2.7 Packed Virtqueues,
3061                  * 2.7.10.1 Structure Size and Alignment:
3062                  * The Queue Size value does not have to be a power of 2.
3063                  */
3064                 dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3065                 dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3066         } else {
3067                 /*
3068                  * According to 2.6 Split Virtqueues:
3069                  * Queue Size value is always a power of 2. The maximum Queue
3070                  * Size value is 32768.
3071                  */
3072                 dev_info->rx_desc_lim.nb_max = 32768;
3073                 dev_info->tx_desc_lim.nb_max = 32768;
3074         }
3075         /*
3076          * Actual minimum is not the same for virtqueues of different kinds,
3077          * but to avoid tangling the code with separate branches, rely on
3078          * default thresholds since desc number must be at least of their size.
3079          */
3080         dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3081                                                RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3082         dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3083         dev_info->rx_desc_lim.nb_align = 1;
3084         dev_info->tx_desc_lim.nb_align = 1;
3085
3086         return 0;
3087 }
3088
3089 /*
3090  * It enables testpmd to collect per queue stats.
3091  */
3092 static int
3093 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3094 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3095 __rte_unused uint8_t is_rx)
3096 {
3097         return 0;
3098 }
3099
3100 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3101 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);