net/iavf: fix NAT-T payload length
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47         uint32_t *speed,
48         int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50                                 struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52         int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
55                 struct rte_eth_rss_conf *rss_conf);
56 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
57                 struct rte_eth_rss_conf *rss_conf);
58 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
59                          struct rte_eth_rss_reta_entry64 *reta_conf,
60                          uint16_t reta_size);
61 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
62                          struct rte_eth_rss_reta_entry64 *reta_conf,
63                          uint16_t reta_size);
64
65 static void virtio_set_hwaddr(struct virtio_hw *hw);
66 static void virtio_get_hwaddr(struct virtio_hw *hw);
67
68 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
69                                  struct rte_eth_stats *stats);
70 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
71                                  struct rte_eth_xstat *xstats, unsigned n);
72 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
73                                        struct rte_eth_xstat_name *xstats_names,
74                                        unsigned limit);
75 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
76 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
77 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
78                                 uint16_t vlan_id, int on);
79 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
80                                 struct rte_ether_addr *mac_addr,
81                                 uint32_t index, uint32_t vmdq);
82 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
83 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
84                                 struct rte_ether_addr *mac_addr);
85
86 static int virtio_intr_disable(struct rte_eth_dev *dev);
87 static int virtio_get_monitor_addr(void *rx_queue,
88                                 struct rte_power_monitor_cond *pmc);
89
90 static int virtio_dev_queue_stats_mapping_set(
91         struct rte_eth_dev *eth_dev,
92         uint16_t queue_id,
93         uint8_t stat_idx,
94         uint8_t is_rx);
95
96 static void virtio_notify_peers(struct rte_eth_dev *dev);
97 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
98
99 struct rte_virtio_xstats_name_off {
100         char name[RTE_ETH_XSTATS_NAME_SIZE];
101         unsigned offset;
102 };
103
104 /* [rt]x_qX_ is prepended to the name string here */
105 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
106         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
107         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
108         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
109         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
110         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
111         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
112         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
113         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
114         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
115         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
116         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
117         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
118         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
119 };
120
121 /* [rt]x_qX_ is prepended to the name string here */
122 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
123         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
124         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
125         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138                             sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140                             sizeof(rte_virtio_txq_stat_strings[0]))
141
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146                            struct virtio_pmd_ctrl *ctrl,
147                            int *dlen, int pkt_num)
148 {
149         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
150         int head;
151         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
152         struct virtio_pmd_ctrl *result;
153         uint16_t flags;
154         int sum = 0;
155         int nb_descs = 0;
156         int k;
157
158         /*
159          * Format is enforced in qemu code:
160          * One TX packet for header;
161          * At least one TX packet per argument;
162          * One RX packet for ACK.
163          */
164         head = vq->vq_avail_idx;
165         flags = vq->vq_packed.cached_flags;
166         desc[head].addr = cvq->virtio_net_hdr_mem;
167         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168         vq->vq_free_cnt--;
169         nb_descs++;
170         if (++vq->vq_avail_idx >= vq->vq_nentries) {
171                 vq->vq_avail_idx -= vq->vq_nentries;
172                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
173         }
174
175         for (k = 0; k < pkt_num; k++) {
176                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177                         + sizeof(struct virtio_net_ctrl_hdr)
178                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179                 desc[vq->vq_avail_idx].len = dlen[k];
180                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181                         vq->vq_packed.cached_flags;
182                 sum += dlen[k];
183                 vq->vq_free_cnt--;
184                 nb_descs++;
185                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
186                         vq->vq_avail_idx -= vq->vq_nentries;
187                         vq->vq_packed.cached_flags ^=
188                                 VRING_PACKED_DESC_F_AVAIL_USED;
189                 }
190         }
191
192         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193                 + sizeof(struct virtio_net_ctrl_hdr);
194         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196                 vq->vq_packed.cached_flags;
197         vq->vq_free_cnt--;
198         nb_descs++;
199         if (++vq->vq_avail_idx >= vq->vq_nentries) {
200                 vq->vq_avail_idx -= vq->vq_nentries;
201                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
202         }
203
204         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
205                         vq->hw->weak_barriers);
206
207         virtio_wmb(vq->hw->weak_barriers);
208         virtqueue_notify(vq);
209
210         /* wait for used desc in virtqueue
211          * desc_is_used has a load-acquire or rte_io_rmb inside
212          */
213         while (!desc_is_used(&desc[head], vq))
214                 usleep(100);
215
216         /* now get used descriptors */
217         vq->vq_free_cnt += nb_descs;
218         vq->vq_used_cons_idx += nb_descs;
219         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
220                 vq->vq_used_cons_idx -= vq->vq_nentries;
221                 vq->vq_packed.used_wrap_counter ^= 1;
222         }
223
224         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
225                         "vq->vq_avail_idx=%d\n"
226                         "vq->vq_used_cons_idx=%d\n"
227                         "vq->vq_packed.cached_flags=0x%x\n"
228                         "vq->vq_packed.used_wrap_counter=%d",
229                         vq->vq_free_cnt,
230                         vq->vq_avail_idx,
231                         vq->vq_used_cons_idx,
232                         vq->vq_packed.cached_flags,
233                         vq->vq_packed.used_wrap_counter);
234
235         result = cvq->virtio_net_hdr_mz->addr;
236         return result;
237 }
238
239 static struct virtio_pmd_ctrl *
240 virtio_send_command_split(struct virtnet_ctl *cvq,
241                           struct virtio_pmd_ctrl *ctrl,
242                           int *dlen, int pkt_num)
243 {
244         struct virtio_pmd_ctrl *result;
245         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
246         uint32_t head, i;
247         int k, sum = 0;
248
249         head = vq->vq_desc_head_idx;
250
251         /*
252          * Format is enforced in qemu code:
253          * One TX packet for header;
254          * At least one TX packet per argument;
255          * One RX packet for ACK.
256          */
257         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
258         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
259         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
260         vq->vq_free_cnt--;
261         i = vq->vq_split.ring.desc[head].next;
262
263         for (k = 0; k < pkt_num; k++) {
264                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
265                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266                         + sizeof(struct virtio_net_ctrl_hdr)
267                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
268                 vq->vq_split.ring.desc[i].len = dlen[k];
269                 sum += dlen[k];
270                 vq->vq_free_cnt--;
271                 i = vq->vq_split.ring.desc[i].next;
272         }
273
274         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
275         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
276                         + sizeof(struct virtio_net_ctrl_hdr);
277         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
278         vq->vq_free_cnt--;
279
280         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
281
282         vq_update_avail_ring(vq, head);
283         vq_update_avail_idx(vq);
284
285         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
286
287         virtqueue_notify(vq);
288
289         while (virtqueue_nused(vq) == 0)
290                 usleep(100);
291
292         while (virtqueue_nused(vq)) {
293                 uint32_t idx, desc_idx, used_idx;
294                 struct vring_used_elem *uep;
295
296                 used_idx = (uint32_t)(vq->vq_used_cons_idx
297                                 & (vq->vq_nentries - 1));
298                 uep = &vq->vq_split.ring.used->ring[used_idx];
299                 idx = (uint32_t) uep->id;
300                 desc_idx = idx;
301
302                 while (vq->vq_split.ring.desc[desc_idx].flags &
303                                 VRING_DESC_F_NEXT) {
304                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
305                         vq->vq_free_cnt++;
306                 }
307
308                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
309                 vq->vq_desc_head_idx = idx;
310
311                 vq->vq_used_cons_idx++;
312                 vq->vq_free_cnt++;
313         }
314
315         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
316                         vq->vq_free_cnt, vq->vq_desc_head_idx);
317
318         result = cvq->virtio_net_hdr_mz->addr;
319         return result;
320 }
321
322 static int
323 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
324                     int *dlen, int pkt_num)
325 {
326         virtio_net_ctrl_ack status = ~0;
327         struct virtio_pmd_ctrl *result;
328         struct virtqueue *vq;
329
330         ctrl->status = status;
331
332         if (!cvq) {
333                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
334                 return -1;
335         }
336
337         rte_spinlock_lock(&cvq->lock);
338         vq = virtnet_cq_to_vq(cvq);
339
340         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
341                 "vq->hw->cvq = %p vq = %p",
342                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
343
344         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
345                 rte_spinlock_unlock(&cvq->lock);
346                 return -1;
347         }
348
349         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
350                 sizeof(struct virtio_pmd_ctrl));
351
352         if (virtio_with_packed_queue(vq->hw))
353                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
354         else
355                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
356
357         rte_spinlock_unlock(&cvq->lock);
358         return result->status;
359 }
360
361 static int
362 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
363 {
364         struct virtio_hw *hw = dev->data->dev_private;
365         struct virtio_pmd_ctrl ctrl;
366         struct virtio_net_ctrl_rss rss;
367         int dlen, ret;
368
369         rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
370         RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
371         rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
372         rss.unclassified_queue = 0;
373         memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
374         rss.max_tx_vq = nb_queues;
375         rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
376         memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
377
378         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
379         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
380         memcpy(ctrl.data, &rss, sizeof(rss));
381
382         dlen = sizeof(rss);
383
384         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
385         if (ret) {
386                 PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
387                 return -EINVAL;
388         }
389
390         return 0;
391 }
392
393 static int
394 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
395 {
396         struct virtio_hw *hw = dev->data->dev_private;
397         struct virtio_pmd_ctrl ctrl;
398         int dlen;
399         int ret;
400
401         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
402         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
403         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
404
405         dlen = sizeof(uint16_t);
406
407         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
408         if (ret) {
409                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
410                           "failed, this is too late now...");
411                 return -EINVAL;
412         }
413
414         return 0;
415 }
416
417 static int
418 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
419 {
420         struct virtio_hw *hw = dev->data->dev_private;
421
422         if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
423                 return virtio_set_multiple_queues_rss(dev, nb_queues);
424         else
425                 return virtio_set_multiple_queues_auto(dev, nb_queues);
426 }
427
428 static uint16_t
429 virtio_get_nr_vq(struct virtio_hw *hw)
430 {
431         uint16_t nr_vq = hw->max_queue_pairs * 2;
432
433         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
434                 nr_vq += 1;
435
436         return nr_vq;
437 }
438
439 static void
440 virtio_init_vring(struct virtqueue *vq)
441 {
442         int size = vq->vq_nentries;
443         uint8_t *ring_mem = vq->vq_ring_virt_mem;
444
445         PMD_INIT_FUNC_TRACE();
446
447         memset(ring_mem, 0, vq->vq_ring_size);
448
449         vq->vq_used_cons_idx = 0;
450         vq->vq_desc_head_idx = 0;
451         vq->vq_avail_idx = 0;
452         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
453         vq->vq_free_cnt = vq->vq_nentries;
454         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
455         if (virtio_with_packed_queue(vq->hw)) {
456                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
457                                   VIRTIO_VRING_ALIGN, size);
458                 vring_desc_init_packed(vq, size);
459         } else {
460                 struct vring *vr = &vq->vq_split.ring;
461
462                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
463                 vring_desc_init_split(vr->desc, size);
464         }
465         /*
466          * Disable device(host) interrupting guest
467          */
468         virtqueue_disable_intr(vq);
469 }
470
471 static int
472 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
473 {
474         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
475         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
476         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
477         unsigned int vq_size, size;
478         struct virtio_hw *hw = dev->data->dev_private;
479         struct virtnet_rx *rxvq = NULL;
480         struct virtnet_tx *txvq = NULL;
481         struct virtnet_ctl *cvq = NULL;
482         struct virtqueue *vq;
483         size_t sz_hdr_mz = 0;
484         void *sw_ring = NULL;
485         int queue_type = virtio_get_queue_type(hw, queue_idx);
486         int ret;
487         int numa_node = dev->device->numa_node;
488         struct rte_mbuf *fake_mbuf = NULL;
489
490         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
491                         queue_idx, numa_node);
492
493         /*
494          * Read the virtqueue size from the Queue Size field
495          * Always power of 2 and if 0 virtqueue does not exist
496          */
497         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
498         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
499         if (vq_size == 0) {
500                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
501                 return -EINVAL;
502         }
503
504         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
505                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
506                 return -EINVAL;
507         }
508
509         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
510                  dev->data->port_id, queue_idx);
511
512         size = RTE_ALIGN_CEIL(sizeof(*vq) +
513                                 vq_size * sizeof(struct vq_desc_extra),
514                                 RTE_CACHE_LINE_SIZE);
515         if (queue_type == VTNET_TQ) {
516                 /*
517                  * For each xmit packet, allocate a virtio_net_hdr
518                  * and indirect ring elements
519                  */
520                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
521         } else if (queue_type == VTNET_CQ) {
522                 /* Allocate a page for control vq command, data and status */
523                 sz_hdr_mz = rte_mem_page_size();
524         }
525
526         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
527                                 numa_node);
528         if (vq == NULL) {
529                 PMD_INIT_LOG(ERR, "can not allocate vq");
530                 return -ENOMEM;
531         }
532         hw->vqs[queue_idx] = vq;
533
534         vq->hw = hw;
535         vq->vq_queue_index = queue_idx;
536         vq->vq_nentries = vq_size;
537         if (virtio_with_packed_queue(hw)) {
538                 vq->vq_packed.used_wrap_counter = 1;
539                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
540                 vq->vq_packed.event_flags_shadow = 0;
541                 if (queue_type == VTNET_RQ)
542                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
543         }
544
545         /*
546          * Reserve a memzone for vring elements
547          */
548         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
549         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
550         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
551                      size, vq->vq_ring_size);
552
553         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
554                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
555                         VIRTIO_VRING_ALIGN);
556         if (mz == NULL) {
557                 if (rte_errno == EEXIST)
558                         mz = rte_memzone_lookup(vq_name);
559                 if (mz == NULL) {
560                         ret = -ENOMEM;
561                         goto free_vq;
562                 }
563         }
564
565         memset(mz->addr, 0, mz->len);
566
567         if (hw->use_va)
568                 vq->vq_ring_mem = (uintptr_t)mz->addr;
569         else
570                 vq->vq_ring_mem = mz->iova;
571
572         vq->vq_ring_virt_mem = mz->addr;
573         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
574         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
575
576         virtio_init_vring(vq);
577
578         if (sz_hdr_mz) {
579                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
580                          dev->data->port_id, queue_idx);
581                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
582                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
583                                 RTE_CACHE_LINE_SIZE);
584                 if (hdr_mz == NULL) {
585                         if (rte_errno == EEXIST)
586                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
587                         if (hdr_mz == NULL) {
588                                 ret = -ENOMEM;
589                                 goto free_mz;
590                         }
591                 }
592         }
593
594         if (queue_type == VTNET_RQ) {
595                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
596                                sizeof(vq->sw_ring[0]);
597
598                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
599                                 RTE_CACHE_LINE_SIZE, numa_node);
600                 if (!sw_ring) {
601                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
602                         ret = -ENOMEM;
603                         goto free_hdr_mz;
604                 }
605
606                 fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
607                                 RTE_CACHE_LINE_SIZE, numa_node);
608                 if (!fake_mbuf) {
609                         PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
610                         ret = -ENOMEM;
611                         goto free_sw_ring;
612                 }
613
614                 vq->sw_ring = sw_ring;
615                 rxvq = &vq->rxq;
616                 rxvq->port_id = dev->data->port_id;
617                 rxvq->mz = mz;
618                 rxvq->fake_mbuf = fake_mbuf;
619         } else if (queue_type == VTNET_TQ) {
620                 txvq = &vq->txq;
621                 txvq->port_id = dev->data->port_id;
622                 txvq->mz = mz;
623                 txvq->virtio_net_hdr_mz = hdr_mz;
624                 if (hw->use_va)
625                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
626                 else
627                         txvq->virtio_net_hdr_mem = hdr_mz->iova;
628         } else if (queue_type == VTNET_CQ) {
629                 cvq = &vq->cq;
630                 cvq->mz = mz;
631                 cvq->virtio_net_hdr_mz = hdr_mz;
632                 if (hw->use_va)
633                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
634                 else
635                         cvq->virtio_net_hdr_mem = hdr_mz->iova;
636                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
637
638                 hw->cvq = cvq;
639         }
640
641         if (hw->use_va)
642                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
643         else
644                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
645
646         if (queue_type == VTNET_TQ) {
647                 struct virtio_tx_region *txr;
648                 unsigned int i;
649
650                 txr = hdr_mz->addr;
651                 memset(txr, 0, vq_size * sizeof(*txr));
652                 for (i = 0; i < vq_size; i++) {
653                         /* first indirect descriptor is always the tx header */
654                         if (!virtio_with_packed_queue(hw)) {
655                                 struct vring_desc *start_dp = txr[i].tx_indir;
656                                 vring_desc_init_split(start_dp,
657                                                       RTE_DIM(txr[i].tx_indir));
658                                 start_dp->addr = txvq->virtio_net_hdr_mem
659                                         + i * sizeof(*txr)
660                                         + offsetof(struct virtio_tx_region,
661                                                    tx_hdr);
662                                 start_dp->len = hw->vtnet_hdr_size;
663                                 start_dp->flags = VRING_DESC_F_NEXT;
664                         } else {
665                                 struct vring_packed_desc *start_dp =
666                                         txr[i].tx_packed_indir;
667                                 vring_desc_init_indirect_packed(start_dp,
668                                       RTE_DIM(txr[i].tx_packed_indir));
669                                 start_dp->addr = txvq->virtio_net_hdr_mem
670                                         + i * sizeof(*txr)
671                                         + offsetof(struct virtio_tx_region,
672                                                    tx_hdr);
673                                 start_dp->len = hw->vtnet_hdr_size;
674                         }
675                 }
676         }
677
678         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
679                 PMD_INIT_LOG(ERR, "setup_queue failed");
680                 ret = -EINVAL;
681                 goto clean_vq;
682         }
683
684         return 0;
685
686 clean_vq:
687         hw->cvq = NULL;
688         rte_free(fake_mbuf);
689 free_sw_ring:
690         rte_free(sw_ring);
691 free_hdr_mz:
692         rte_memzone_free(hdr_mz);
693 free_mz:
694         rte_memzone_free(mz);
695 free_vq:
696         rte_free(vq);
697         hw->vqs[queue_idx] = NULL;
698
699         return ret;
700 }
701
702 static void
703 virtio_free_queues(struct virtio_hw *hw)
704 {
705         uint16_t nr_vq = virtio_get_nr_vq(hw);
706         struct virtqueue *vq;
707         int queue_type;
708         uint16_t i;
709
710         if (hw->vqs == NULL)
711                 return;
712
713         for (i = 0; i < nr_vq; i++) {
714                 vq = hw->vqs[i];
715                 if (!vq)
716                         continue;
717
718                 queue_type = virtio_get_queue_type(hw, i);
719                 if (queue_type == VTNET_RQ) {
720                         rte_free(vq->rxq.fake_mbuf);
721                         rte_free(vq->sw_ring);
722                         rte_memzone_free(vq->rxq.mz);
723                 } else if (queue_type == VTNET_TQ) {
724                         rte_memzone_free(vq->txq.mz);
725                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
726                 } else {
727                         rte_memzone_free(vq->cq.mz);
728                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
729                 }
730
731                 rte_free(vq);
732                 hw->vqs[i] = NULL;
733         }
734
735         rte_free(hw->vqs);
736         hw->vqs = NULL;
737 }
738
739 static int
740 virtio_alloc_queues(struct rte_eth_dev *dev)
741 {
742         struct virtio_hw *hw = dev->data->dev_private;
743         uint16_t nr_vq = virtio_get_nr_vq(hw);
744         uint16_t i;
745         int ret;
746
747         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
748         if (!hw->vqs) {
749                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
750                 return -ENOMEM;
751         }
752
753         for (i = 0; i < nr_vq; i++) {
754                 ret = virtio_init_queue(dev, i);
755                 if (ret < 0) {
756                         virtio_free_queues(hw);
757                         return ret;
758                 }
759         }
760
761         return 0;
762 }
763
764 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
765
766 static void
767 virtio_free_rss(struct virtio_hw *hw)
768 {
769         rte_free(hw->rss_key);
770         hw->rss_key = NULL;
771
772         rte_free(hw->rss_reta);
773         hw->rss_reta = NULL;
774 }
775
776 int
777 virtio_dev_close(struct rte_eth_dev *dev)
778 {
779         struct virtio_hw *hw = dev->data->dev_private;
780         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
781
782         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
783         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
784                 return 0;
785
786         if (!hw->opened)
787                 return 0;
788         hw->opened = 0;
789
790         /* reset the NIC */
791         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
792                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
793         if (intr_conf->rxq)
794                 virtio_queues_unbind_intr(dev);
795
796         if (intr_conf->lsc || intr_conf->rxq) {
797                 virtio_intr_disable(dev);
798                 rte_intr_efd_disable(dev->intr_handle);
799                 rte_intr_vec_list_free(dev->intr_handle);
800         }
801
802         virtio_reset(hw);
803         virtio_dev_free_mbufs(dev);
804         virtio_free_queues(hw);
805         virtio_free_rss(hw);
806
807         return VIRTIO_OPS(hw)->dev_close(hw);
808 }
809
810 static int
811 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
812 {
813         struct virtio_hw *hw = dev->data->dev_private;
814         struct virtio_pmd_ctrl ctrl;
815         int dlen[1];
816         int ret;
817
818         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
819                 PMD_INIT_LOG(INFO, "host does not support rx control");
820                 return -ENOTSUP;
821         }
822
823         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
824         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
825         ctrl.data[0] = 1;
826         dlen[0] = 1;
827
828         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
829         if (ret) {
830                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
831                 return -EAGAIN;
832         }
833
834         return 0;
835 }
836
837 static int
838 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
839 {
840         struct virtio_hw *hw = dev->data->dev_private;
841         struct virtio_pmd_ctrl ctrl;
842         int dlen[1];
843         int ret;
844
845         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
846                 PMD_INIT_LOG(INFO, "host does not support rx control");
847                 return -ENOTSUP;
848         }
849
850         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
851         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
852         ctrl.data[0] = 0;
853         dlen[0] = 1;
854
855         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
856         if (ret) {
857                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
858                 return -EAGAIN;
859         }
860
861         return 0;
862 }
863
864 static int
865 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
866 {
867         struct virtio_hw *hw = dev->data->dev_private;
868         struct virtio_pmd_ctrl ctrl;
869         int dlen[1];
870         int ret;
871
872         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
873                 PMD_INIT_LOG(INFO, "host does not support rx control");
874                 return -ENOTSUP;
875         }
876
877         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
878         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
879         ctrl.data[0] = 1;
880         dlen[0] = 1;
881
882         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
883         if (ret) {
884                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
885                 return -EAGAIN;
886         }
887
888         return 0;
889 }
890
891 static int
892 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
893 {
894         struct virtio_hw *hw = dev->data->dev_private;
895         struct virtio_pmd_ctrl ctrl;
896         int dlen[1];
897         int ret;
898
899         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
900                 PMD_INIT_LOG(INFO, "host does not support rx control");
901                 return -ENOTSUP;
902         }
903
904         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
905         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
906         ctrl.data[0] = 0;
907         dlen[0] = 1;
908
909         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
910         if (ret) {
911                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
912                 return -EAGAIN;
913         }
914
915         return 0;
916 }
917
918 uint16_t
919 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
920 {
921         return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
922 }
923
924 bool
925 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
926                         bool rx_scatter_enabled, const char **error)
927 {
928         if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
929                 *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
930                 return false;
931         }
932
933         return true;
934 }
935
936 static bool
937 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
938                                       uint16_t frame_size)
939 {
940         struct virtio_hw *hw = dev->data->dev_private;
941         struct virtnet_rx *rxvq;
942         struct virtqueue *vq;
943         unsigned int qidx;
944         uint16_t buf_size;
945         const char *error;
946
947         if (hw->vqs == NULL)
948                 return true;
949
950         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
951                 vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
952                 if (vq == NULL)
953                         continue;
954
955                 rxvq = &vq->rxq;
956                 if (rxvq->mpool == NULL)
957                         continue;
958                 buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
959
960                 if (!virtio_rx_check_scatter(frame_size, buf_size,
961                                              hw->rx_ol_scatter, &error)) {
962                         PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
963                                      qidx, error);
964                         return false;
965                 }
966         }
967
968         return true;
969 }
970
971 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
972 static int
973 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
974 {
975         struct virtio_hw *hw = dev->data->dev_private;
976         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
977                                  hw->vtnet_hdr_size;
978         uint32_t frame_size = mtu + ether_hdr_len;
979         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
980
981         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
982
983         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
984                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
985                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
986                 return -EINVAL;
987         }
988
989         if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
990                 PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
991                 return -EINVAL;
992         }
993
994         hw->max_rx_pkt_len = frame_size;
995
996         return 0;
997 }
998
999 static int
1000 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1001 {
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1004         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1005
1006         virtqueue_enable_intr(vq);
1007         virtio_mb(hw->weak_barriers);
1008         return 0;
1009 }
1010
1011 static int
1012 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1013 {
1014         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1015         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1016
1017         virtqueue_disable_intr(vq);
1018         return 0;
1019 }
1020
1021 /*
1022  * dev_ops for virtio, bare necessities for basic operation
1023  */
1024 static const struct eth_dev_ops virtio_eth_dev_ops = {
1025         .dev_configure           = virtio_dev_configure,
1026         .dev_start               = virtio_dev_start,
1027         .dev_stop                = virtio_dev_stop,
1028         .dev_close               = virtio_dev_close,
1029         .promiscuous_enable      = virtio_dev_promiscuous_enable,
1030         .promiscuous_disable     = virtio_dev_promiscuous_disable,
1031         .allmulticast_enable     = virtio_dev_allmulticast_enable,
1032         .allmulticast_disable    = virtio_dev_allmulticast_disable,
1033         .mtu_set                 = virtio_mtu_set,
1034         .dev_infos_get           = virtio_dev_info_get,
1035         .stats_get               = virtio_dev_stats_get,
1036         .xstats_get              = virtio_dev_xstats_get,
1037         .xstats_get_names        = virtio_dev_xstats_get_names,
1038         .stats_reset             = virtio_dev_stats_reset,
1039         .xstats_reset            = virtio_dev_stats_reset,
1040         .link_update             = virtio_dev_link_update,
1041         .vlan_offload_set        = virtio_dev_vlan_offload_set,
1042         .rx_queue_setup          = virtio_dev_rx_queue_setup,
1043         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1044         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1045         .tx_queue_setup          = virtio_dev_tx_queue_setup,
1046         .rss_hash_update         = virtio_dev_rss_hash_update,
1047         .rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1048         .reta_update             = virtio_dev_rss_reta_update,
1049         .reta_query              = virtio_dev_rss_reta_query,
1050         /* collect stats per queue */
1051         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1052         .vlan_filter_set         = virtio_vlan_filter_set,
1053         .mac_addr_add            = virtio_mac_addr_add,
1054         .mac_addr_remove         = virtio_mac_addr_remove,
1055         .mac_addr_set            = virtio_mac_addr_set,
1056         .get_monitor_addr        = virtio_get_monitor_addr,
1057 };
1058
1059 /*
1060  * dev_ops for virtio-user in secondary processes, as we just have
1061  * some limited supports currently.
1062  */
1063 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1064         .dev_infos_get           = virtio_dev_info_get,
1065         .stats_get               = virtio_dev_stats_get,
1066         .xstats_get              = virtio_dev_xstats_get,
1067         .xstats_get_names        = virtio_dev_xstats_get_names,
1068         .stats_reset             = virtio_dev_stats_reset,
1069         .xstats_reset            = virtio_dev_stats_reset,
1070         /* collect stats per queue */
1071         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1072 };
1073
1074 static void
1075 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1076 {
1077         unsigned i;
1078
1079         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1080                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1081                 if (txvq == NULL)
1082                         continue;
1083
1084                 stats->opackets += txvq->stats.packets;
1085                 stats->obytes += txvq->stats.bytes;
1086
1087                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1088                         stats->q_opackets[i] = txvq->stats.packets;
1089                         stats->q_obytes[i] = txvq->stats.bytes;
1090                 }
1091         }
1092
1093         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1094                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1095                 if (rxvq == NULL)
1096                         continue;
1097
1098                 stats->ipackets += rxvq->stats.packets;
1099                 stats->ibytes += rxvq->stats.bytes;
1100                 stats->ierrors += rxvq->stats.errors;
1101
1102                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1103                         stats->q_ipackets[i] = rxvq->stats.packets;
1104                         stats->q_ibytes[i] = rxvq->stats.bytes;
1105                 }
1106         }
1107
1108         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1109 }
1110
1111 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1112                                        struct rte_eth_xstat_name *xstats_names,
1113                                        __rte_unused unsigned limit)
1114 {
1115         unsigned i;
1116         unsigned count = 0;
1117         unsigned t;
1118
1119         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1120                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1121
1122         if (xstats_names != NULL) {
1123                 /* Note: limit checked in rte_eth_xstats_names() */
1124
1125                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1127                         if (rxvq == NULL)
1128                                 continue;
1129                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1130                                 snprintf(xstats_names[count].name,
1131                                         sizeof(xstats_names[count].name),
1132                                         "rx_q%u_%s", i,
1133                                         rte_virtio_rxq_stat_strings[t].name);
1134                                 count++;
1135                         }
1136                 }
1137
1138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140                         if (txvq == NULL)
1141                                 continue;
1142                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1143                                 snprintf(xstats_names[count].name,
1144                                         sizeof(xstats_names[count].name),
1145                                         "tx_q%u_%s", i,
1146                                         rte_virtio_txq_stat_strings[t].name);
1147                                 count++;
1148                         }
1149                 }
1150                 return count;
1151         }
1152         return nstats;
1153 }
1154
1155 static int
1156 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1157                       unsigned n)
1158 {
1159         unsigned i;
1160         unsigned count = 0;
1161
1162         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1163                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1164
1165         if (n < nstats)
1166                 return nstats;
1167
1168         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1169                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1170
1171                 if (rxvq == NULL)
1172                         continue;
1173
1174                 unsigned t;
1175
1176                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1177                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1178                                 rte_virtio_rxq_stat_strings[t].offset);
1179                         xstats[count].id = count;
1180                         count++;
1181                 }
1182         }
1183
1184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1186
1187                 if (txvq == NULL)
1188                         continue;
1189
1190                 unsigned t;
1191
1192                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1193                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1194                                 rte_virtio_txq_stat_strings[t].offset);
1195                         xstats[count].id = count;
1196                         count++;
1197                 }
1198         }
1199
1200         return count;
1201 }
1202
1203 static int
1204 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1205 {
1206         virtio_update_stats(dev, stats);
1207
1208         return 0;
1209 }
1210
1211 static int
1212 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1213 {
1214         unsigned int i;
1215
1216         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1217                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1218                 if (txvq == NULL)
1219                         continue;
1220
1221                 txvq->stats.packets = 0;
1222                 txvq->stats.bytes = 0;
1223                 txvq->stats.multicast = 0;
1224                 txvq->stats.broadcast = 0;
1225                 memset(txvq->stats.size_bins, 0,
1226                        sizeof(txvq->stats.size_bins[0]) * 8);
1227         }
1228
1229         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1230                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1231                 if (rxvq == NULL)
1232                         continue;
1233
1234                 rxvq->stats.packets = 0;
1235                 rxvq->stats.bytes = 0;
1236                 rxvq->stats.errors = 0;
1237                 rxvq->stats.multicast = 0;
1238                 rxvq->stats.broadcast = 0;
1239                 memset(rxvq->stats.size_bins, 0,
1240                        sizeof(rxvq->stats.size_bins[0]) * 8);
1241         }
1242
1243         return 0;
1244 }
1245
1246 static void
1247 virtio_set_hwaddr(struct virtio_hw *hw)
1248 {
1249         virtio_write_dev_config(hw,
1250                         offsetof(struct virtio_net_config, mac),
1251                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1252 }
1253
1254 static void
1255 virtio_get_hwaddr(struct virtio_hw *hw)
1256 {
1257         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1258                 virtio_read_dev_config(hw,
1259                         offsetof(struct virtio_net_config, mac),
1260                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1261         } else {
1262                 rte_eth_random_addr(&hw->mac_addr[0]);
1263                 virtio_set_hwaddr(hw);
1264         }
1265 }
1266
1267 static int
1268 virtio_mac_table_set(struct virtio_hw *hw,
1269                      const struct virtio_net_ctrl_mac *uc,
1270                      const struct virtio_net_ctrl_mac *mc)
1271 {
1272         struct virtio_pmd_ctrl ctrl;
1273         int err, len[2];
1274
1275         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1276                 PMD_DRV_LOG(INFO, "host does not support mac table");
1277                 return -1;
1278         }
1279
1280         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1281         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1282
1283         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1284         memcpy(ctrl.data, uc, len[0]);
1285
1286         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1287         memcpy(ctrl.data + len[0], mc, len[1]);
1288
1289         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1290         if (err != 0)
1291                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1292         return err;
1293 }
1294
1295 static int
1296 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1297                     uint32_t index, uint32_t vmdq __rte_unused)
1298 {
1299         struct virtio_hw *hw = dev->data->dev_private;
1300         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1301         unsigned int i;
1302         struct virtio_net_ctrl_mac *uc, *mc;
1303
1304         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1305                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1306                 return -EINVAL;
1307         }
1308
1309         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1310                 sizeof(uc->entries));
1311         uc->entries = 0;
1312         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1313                 sizeof(mc->entries));
1314         mc->entries = 0;
1315
1316         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1317                 const struct rte_ether_addr *addr
1318                         = (i == index) ? mac_addr : addrs + i;
1319                 struct virtio_net_ctrl_mac *tbl
1320                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1321
1322                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1323         }
1324
1325         return virtio_mac_table_set(hw, uc, mc);
1326 }
1327
1328 static void
1329 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1330 {
1331         struct virtio_hw *hw = dev->data->dev_private;
1332         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1333         struct virtio_net_ctrl_mac *uc, *mc;
1334         unsigned int i;
1335
1336         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1337                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1338                 return;
1339         }
1340
1341         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1342                 sizeof(uc->entries));
1343         uc->entries = 0;
1344         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1345                 sizeof(mc->entries));
1346         mc->entries = 0;
1347
1348         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1349                 struct virtio_net_ctrl_mac *tbl;
1350
1351                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1352                         continue;
1353
1354                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1355                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1356                         RTE_ETHER_ADDR_LEN);
1357         }
1358
1359         virtio_mac_table_set(hw, uc, mc);
1360 }
1361
1362 static int
1363 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1364 {
1365         struct virtio_hw *hw = dev->data->dev_private;
1366
1367         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1368
1369         /* Use atomic update if available */
1370         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1371                 struct virtio_pmd_ctrl ctrl;
1372                 int len = RTE_ETHER_ADDR_LEN;
1373
1374                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1375                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1376
1377                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1378                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1379         }
1380
1381         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1382                 return -ENOTSUP;
1383
1384         virtio_set_hwaddr(hw);
1385         return 0;
1386 }
1387
1388 #define CLB_VAL_IDX 0
1389 #define CLB_MSK_IDX 1
1390 #define CLB_MATCH_IDX 2
1391 static int
1392 virtio_monitor_callback(const uint64_t value,
1393                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1394 {
1395         const uint64_t m = opaque[CLB_MSK_IDX];
1396         const uint64_t v = opaque[CLB_VAL_IDX];
1397         const uint64_t c = opaque[CLB_MATCH_IDX];
1398
1399         if (c)
1400                 return (value & m) == v ? -1 : 0;
1401         else
1402                 return (value & m) == v ? 0 : -1;
1403 }
1404
1405 static int
1406 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1407 {
1408         struct virtnet_rx *rxvq = rx_queue;
1409         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1410         struct virtio_hw *hw;
1411
1412         if (vq == NULL)
1413                 return -EINVAL;
1414
1415         hw = vq->hw;
1416         if (virtio_with_packed_queue(hw)) {
1417                 struct vring_packed_desc *desc;
1418                 desc = vq->vq_packed.ring.desc;
1419                 pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1420                 if (vq->vq_packed.used_wrap_counter)
1421                         pmc->opaque[CLB_VAL_IDX] =
1422                                                 VRING_PACKED_DESC_F_AVAIL_USED;
1423                 else
1424                         pmc->opaque[CLB_VAL_IDX] = 0;
1425                 pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1426                 pmc->opaque[CLB_MATCH_IDX] = 1;
1427                 pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1428         } else {
1429                 pmc->addr = &vq->vq_split.ring.used->idx;
1430                 pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1431                                         & (vq->vq_nentries - 1);
1432                 pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1433                 pmc->opaque[CLB_MATCH_IDX] = 0;
1434                 pmc->size = sizeof(vq->vq_split.ring.used->idx);
1435         }
1436         pmc->fn = virtio_monitor_callback;
1437
1438         return 0;
1439 }
1440
1441 static int
1442 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1443 {
1444         struct virtio_hw *hw = dev->data->dev_private;
1445         struct virtio_pmd_ctrl ctrl;
1446         int len;
1447
1448         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1449                 return -ENOTSUP;
1450
1451         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1452         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1453         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1454         len = sizeof(vlan_id);
1455
1456         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1457 }
1458
1459 static int
1460 virtio_intr_unmask(struct rte_eth_dev *dev)
1461 {
1462         struct virtio_hw *hw = dev->data->dev_private;
1463
1464         if (rte_intr_ack(dev->intr_handle) < 0)
1465                 return -1;
1466
1467         if (VIRTIO_OPS(hw)->intr_detect)
1468                 VIRTIO_OPS(hw)->intr_detect(hw);
1469
1470         return 0;
1471 }
1472
1473 static int
1474 virtio_intr_enable(struct rte_eth_dev *dev)
1475 {
1476         struct virtio_hw *hw = dev->data->dev_private;
1477
1478         if (rte_intr_enable(dev->intr_handle) < 0)
1479                 return -1;
1480
1481         if (VIRTIO_OPS(hw)->intr_detect)
1482                 VIRTIO_OPS(hw)->intr_detect(hw);
1483
1484         return 0;
1485 }
1486
1487 static int
1488 virtio_intr_disable(struct rte_eth_dev *dev)
1489 {
1490         struct virtio_hw *hw = dev->data->dev_private;
1491
1492         if (rte_intr_disable(dev->intr_handle) < 0)
1493                 return -1;
1494
1495         if (VIRTIO_OPS(hw)->intr_detect)
1496                 VIRTIO_OPS(hw)->intr_detect(hw);
1497
1498         return 0;
1499 }
1500
1501 static int
1502 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1503 {
1504         uint64_t host_features;
1505
1506         /* Prepare guest_features: feature that driver wants to support */
1507         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1508                 req_features);
1509
1510         /* Read device(host) feature bits */
1511         host_features = VIRTIO_OPS(hw)->get_features(hw);
1512         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1513                 host_features);
1514
1515         /* If supported, ensure MTU value is valid before acknowledging it. */
1516         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1517                 struct virtio_net_config config;
1518
1519                 virtio_read_dev_config(hw,
1520                         offsetof(struct virtio_net_config, mtu),
1521                         &config.mtu, sizeof(config.mtu));
1522
1523                 if (config.mtu < RTE_ETHER_MIN_MTU)
1524                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1525         }
1526
1527         /*
1528          * Negotiate features: Subset of device feature bits are written back
1529          * guest feature bits.
1530          */
1531         hw->guest_features = req_features;
1532         hw->guest_features = virtio_negotiate_features(hw, host_features);
1533         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1534                 hw->guest_features);
1535
1536         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1537                 return -1;
1538
1539         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1540                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1541
1542                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1543                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1544                         return -1;
1545                 }
1546         }
1547
1548         hw->req_guest_features = req_features;
1549
1550         return 0;
1551 }
1552
1553 int
1554 virtio_dev_pause(struct rte_eth_dev *dev)
1555 {
1556         struct virtio_hw *hw = dev->data->dev_private;
1557
1558         rte_spinlock_lock(&hw->state_lock);
1559
1560         if (hw->started == 0) {
1561                 /* Device is just stopped. */
1562                 rte_spinlock_unlock(&hw->state_lock);
1563                 return -1;
1564         }
1565         hw->started = 0;
1566         /*
1567          * Prevent the worker threads from touching queues to avoid contention,
1568          * 1 ms should be enough for the ongoing Tx function to finish.
1569          */
1570         rte_delay_ms(1);
1571         return 0;
1572 }
1573
1574 /*
1575  * Recover hw state to let the worker threads continue.
1576  */
1577 void
1578 virtio_dev_resume(struct rte_eth_dev *dev)
1579 {
1580         struct virtio_hw *hw = dev->data->dev_private;
1581
1582         hw->started = 1;
1583         rte_spinlock_unlock(&hw->state_lock);
1584 }
1585
1586 /*
1587  * Should be called only after device is paused.
1588  */
1589 int
1590 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1591                 int nb_pkts)
1592 {
1593         struct virtio_hw *hw = dev->data->dev_private;
1594         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1595         int ret;
1596
1597         hw->inject_pkts = tx_pkts;
1598         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1599         hw->inject_pkts = NULL;
1600
1601         return ret;
1602 }
1603
1604 static void
1605 virtio_notify_peers(struct rte_eth_dev *dev)
1606 {
1607         struct virtio_hw *hw = dev->data->dev_private;
1608         struct virtnet_rx *rxvq;
1609         struct rte_mbuf *rarp_mbuf;
1610
1611         if (!dev->data->rx_queues)
1612                 return;
1613
1614         rxvq = dev->data->rx_queues[0];
1615         if (!rxvq)
1616                 return;
1617
1618         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1619                         (struct rte_ether_addr *)hw->mac_addr);
1620         if (rarp_mbuf == NULL) {
1621                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1622                 return;
1623         }
1624
1625         /* If virtio port just stopped, no need to send RARP */
1626         if (virtio_dev_pause(dev) < 0) {
1627                 rte_pktmbuf_free(rarp_mbuf);
1628                 return;
1629         }
1630
1631         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1632         virtio_dev_resume(dev);
1633 }
1634
1635 static void
1636 virtio_ack_link_announce(struct rte_eth_dev *dev)
1637 {
1638         struct virtio_hw *hw = dev->data->dev_private;
1639         struct virtio_pmd_ctrl ctrl;
1640
1641         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1642         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1643
1644         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1645 }
1646
1647 /*
1648  * Process virtio config changed interrupt. Call the callback
1649  * if link state changed, generate gratuitous RARP packet if
1650  * the status indicates an ANNOUNCE.
1651  */
1652 void
1653 virtio_interrupt_handler(void *param)
1654 {
1655         struct rte_eth_dev *dev = param;
1656         struct virtio_hw *hw = dev->data->dev_private;
1657         uint8_t isr;
1658         uint16_t status;
1659
1660         /* Read interrupt status which clears interrupt */
1661         isr = virtio_get_isr(hw);
1662         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1663
1664         if (virtio_intr_unmask(dev) < 0)
1665                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1666
1667         if (isr & VIRTIO_ISR_CONFIG) {
1668                 if (virtio_dev_link_update(dev, 0) == 0)
1669                         rte_eth_dev_callback_process(dev,
1670                                                      RTE_ETH_EVENT_INTR_LSC,
1671                                                      NULL);
1672
1673                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1674                         virtio_read_dev_config(hw,
1675                                 offsetof(struct virtio_net_config, status),
1676                                 &status, sizeof(status));
1677                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1678                                 virtio_notify_peers(dev);
1679                                 if (hw->cvq)
1680                                         virtio_ack_link_announce(dev);
1681                         }
1682                 }
1683         }
1684 }
1685
1686 /* set rx and tx handlers according to what is supported */
1687 static void
1688 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1689 {
1690         struct virtio_hw *hw = eth_dev->data->dev_private;
1691
1692         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1693         if (virtio_with_packed_queue(hw)) {
1694                 PMD_INIT_LOG(INFO,
1695                         "virtio: using packed ring %s Tx path on port %u",
1696                         hw->use_vec_tx ? "vectorized" : "standard",
1697                         eth_dev->data->port_id);
1698                 if (hw->use_vec_tx)
1699                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1700                 else
1701                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1702         } else {
1703                 if (hw->use_inorder_tx) {
1704                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1705                                 eth_dev->data->port_id);
1706                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1707                 } else {
1708                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1709                                 eth_dev->data->port_id);
1710                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1711                 }
1712         }
1713
1714         if (virtio_with_packed_queue(hw)) {
1715                 if (hw->use_vec_rx) {
1716                         PMD_INIT_LOG(INFO,
1717                                 "virtio: using packed ring vectorized Rx path on port %u",
1718                                 eth_dev->data->port_id);
1719                         eth_dev->rx_pkt_burst =
1720                                 &virtio_recv_pkts_packed_vec;
1721                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1722                         PMD_INIT_LOG(INFO,
1723                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1724                                 eth_dev->data->port_id);
1725                         eth_dev->rx_pkt_burst =
1726                                 &virtio_recv_mergeable_pkts_packed;
1727                 } else {
1728                         PMD_INIT_LOG(INFO,
1729                                 "virtio: using packed ring standard Rx path on port %u",
1730                                 eth_dev->data->port_id);
1731                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1732                 }
1733         } else {
1734                 if (hw->use_vec_rx) {
1735                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1736                                 eth_dev->data->port_id);
1737                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1738                 } else if (hw->use_inorder_rx) {
1739                         PMD_INIT_LOG(INFO,
1740                                 "virtio: using inorder Rx path on port %u",
1741                                 eth_dev->data->port_id);
1742                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1743                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1744                         PMD_INIT_LOG(INFO,
1745                                 "virtio: using mergeable buffer Rx path on port %u",
1746                                 eth_dev->data->port_id);
1747                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1748                 } else {
1749                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1750                                 eth_dev->data->port_id);
1751                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1752                 }
1753         }
1754
1755 }
1756
1757 /* Only support 1:1 queue/interrupt mapping so far.
1758  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1759  * interrupt vectors (<N+1).
1760  */
1761 static int
1762 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1763 {
1764         uint32_t i;
1765         struct virtio_hw *hw = dev->data->dev_private;
1766
1767         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1768         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1769                 if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1770                                                        i + 1))
1771                         return -rte_errno;
1772                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1773                                                  VIRTIO_MSI_NO_VECTOR) {
1774                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1775                         return -EBUSY;
1776                 }
1777         }
1778
1779         return 0;
1780 }
1781
1782 static void
1783 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1784 {
1785         uint32_t i;
1786         struct virtio_hw *hw = dev->data->dev_private;
1787
1788         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1789         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1790                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1791                                              hw->vqs[i * VTNET_CQ],
1792                                              VIRTIO_MSI_NO_VECTOR);
1793 }
1794
1795 static int
1796 virtio_configure_intr(struct rte_eth_dev *dev)
1797 {
1798         struct virtio_hw *hw = dev->data->dev_private;
1799
1800         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1801                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1802                 return -ENOTSUP;
1803         }
1804
1805         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1806                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1807                 return -1;
1808         }
1809
1810         if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1811                                     hw->max_queue_pairs)) {
1812                 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1813                              hw->max_queue_pairs);
1814                 return -ENOMEM;
1815         }
1816
1817         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1818                 /* Re-register callback to update max_intr */
1819                 rte_intr_callback_unregister(dev->intr_handle,
1820                                              virtio_interrupt_handler,
1821                                              dev);
1822                 rte_intr_callback_register(dev->intr_handle,
1823                                            virtio_interrupt_handler,
1824                                            dev);
1825         }
1826
1827         /* DO NOT try to remove this! This function will enable msix, or QEMU
1828          * will encounter SIGSEGV when DRIVER_OK is sent.
1829          * And for legacy devices, this should be done before queue/vec binding
1830          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1831          * (22) will be ignored.
1832          */
1833         if (virtio_intr_enable(dev) < 0) {
1834                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1835                 return -1;
1836         }
1837
1838         if (virtio_queues_bind_intr(dev) < 0) {
1839                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1840                 return -1;
1841         }
1842
1843         return 0;
1844 }
1845
1846 static void
1847 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1848                         struct rte_eth_link *link)
1849 {
1850         struct virtio_hw *hw = eth_dev->data->dev_private;
1851         struct virtio_net_config *config;
1852         struct virtio_net_config local_config;
1853
1854         config = &local_config;
1855         virtio_read_dev_config(hw,
1856                 offsetof(struct virtio_net_config, speed),
1857                 &config->speed, sizeof(config->speed));
1858         virtio_read_dev_config(hw,
1859                 offsetof(struct virtio_net_config, duplex),
1860                 &config->duplex, sizeof(config->duplex));
1861         hw->speed = config->speed;
1862         hw->duplex = config->duplex;
1863         if (link != NULL) {
1864                 link->link_duplex = hw->duplex;
1865                 link->link_speed  = hw->speed;
1866         }
1867         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1868                      hw->speed, hw->duplex);
1869 }
1870
1871 static uint64_t
1872 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1873 {
1874         uint64_t virtio_hash_types = 0;
1875
1876         if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1877                                 RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1878                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1879
1880         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1881                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1882
1883         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1884                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1885
1886         if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1887                                 RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1888                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1889
1890         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1891                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1892
1893         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1894                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1895
1896         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1897                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1898
1899         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1900                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1901
1902         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1903                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1904
1905         return virtio_hash_types;
1906 }
1907
1908 static uint64_t
1909 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1910 {
1911         uint64_t rss_offloads = 0;
1912
1913         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1914                 rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1915                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1916
1917         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1918                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1919
1920         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1921                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1922
1923         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1924                 rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1925                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1926
1927         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1928                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1929
1930         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1931                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1932
1933         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1934                 rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1935
1936         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1937                 rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1938
1939         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1940                 rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1941
1942         return rss_offloads;
1943 }
1944
1945 static int
1946 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1947 {
1948         struct virtio_net_config local_config;
1949         struct virtio_net_config *config = &local_config;
1950
1951         virtio_read_dev_config(hw,
1952                         offsetof(struct virtio_net_config, rss_max_key_size),
1953                         &config->rss_max_key_size,
1954                         sizeof(config->rss_max_key_size));
1955         if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1956                 PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1957                                 config->rss_max_key_size);
1958                 return -EINVAL;
1959         }
1960
1961         virtio_read_dev_config(hw,
1962                         offsetof(struct virtio_net_config,
1963                                 rss_max_indirection_table_length),
1964                         &config->rss_max_indirection_table_length,
1965                         sizeof(config->rss_max_indirection_table_length));
1966         if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1967                 PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1968                                 config->rss_max_indirection_table_length);
1969                 return -EINVAL;
1970         }
1971
1972         virtio_read_dev_config(hw,
1973                         offsetof(struct virtio_net_config, supported_hash_types),
1974                         &config->supported_hash_types,
1975                         sizeof(config->supported_hash_types));
1976         if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1977                 PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1978                                 config->supported_hash_types);
1979                 return -EINVAL;
1980         }
1981
1982         *rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1983
1984         PMD_INIT_LOG(DEBUG, "Device RSS config:");
1985         PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1986         PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1987         PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1988
1989         return 0;
1990 }
1991
1992 static int
1993 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1994                 struct rte_eth_rss_conf *rss_conf)
1995 {
1996         struct virtio_hw *hw = dev->data->dev_private;
1997         char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1998         uint32_t old_hash_types;
1999         uint16_t nb_queues;
2000         int ret;
2001
2002         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2003                 return -ENOTSUP;
2004
2005         if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2006                 return -EINVAL;
2007
2008         old_hash_types = hw->rss_hash_types;
2009         hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2010
2011         if (rss_conf->rss_key && rss_conf->rss_key_len) {
2012                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2013                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2014                                         VIRTIO_NET_RSS_KEY_SIZE);
2015                         ret = -EINVAL;
2016                         goto restore_types;
2017                 }
2018                 memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2019                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2020         }
2021
2022         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2023         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2024         if (ret < 0) {
2025                 PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2026                 goto restore_key;
2027         }
2028
2029         return 0;
2030 restore_key:
2031         if (rss_conf->rss_key && rss_conf->rss_key_len)
2032                 memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2033 restore_types:
2034         hw->rss_hash_types = old_hash_types;
2035
2036         return ret;
2037 }
2038
2039 static int
2040 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2041                 struct rte_eth_rss_conf *rss_conf)
2042 {
2043         struct virtio_hw *hw = dev->data->dev_private;
2044
2045         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2046                 return -ENOTSUP;
2047
2048         if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2049                 memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2050         rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2051         rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2052
2053         return 0;
2054 }
2055
2056 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2057                          struct rte_eth_rss_reta_entry64 *reta_conf,
2058                          uint16_t reta_size)
2059 {
2060         struct virtio_hw *hw = dev->data->dev_private;
2061         uint16_t nb_queues;
2062         uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2063         int idx, pos, i, ret;
2064
2065         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2066                 return -ENOTSUP;
2067
2068         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2069                 return -EINVAL;
2070
2071         memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2072
2073         for (i = 0; i < reta_size; i++) {
2074                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2075                 pos = i % RTE_ETH_RETA_GROUP_SIZE;
2076
2077                 if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2078                         continue;
2079
2080                 hw->rss_reta[i] = reta_conf[idx].reta[pos];
2081         }
2082
2083         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2084         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2085         if (ret < 0) {
2086                 PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2087                 memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2088         }
2089
2090         hw->rss_rx_queues = dev->data->nb_rx_queues;
2091
2092         return ret;
2093 }
2094
2095 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2096                          struct rte_eth_rss_reta_entry64 *reta_conf,
2097                          uint16_t reta_size)
2098 {
2099         struct virtio_hw *hw = dev->data->dev_private;
2100         int idx, i;
2101
2102         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2103                 return -ENOTSUP;
2104
2105         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2106                 return -EINVAL;
2107
2108         for (i = 0; i < reta_size; i++) {
2109                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2110                 reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2111         }
2112
2113         return 0;
2114 }
2115
2116 /*
2117  * As default RSS hash key, it uses the default key of the
2118  * Intel IXGBE devices. It can be updated by the application
2119  * with any 40B key value.
2120  */
2121 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2122         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2123         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2124         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2125         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2126         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2127 };
2128
2129 static int
2130 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2131 {
2132         struct virtio_hw *hw = eth_dev->data->dev_private;
2133         uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2134         struct rte_eth_rss_conf *rss_conf;
2135         int ret, i;
2136
2137         if (!nb_rx_queues) {
2138                 PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2139                 return -EINVAL;
2140         }
2141
2142         rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2143
2144         ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2145         if (ret)
2146                 return ret;
2147
2148         if (rss_conf->rss_hf) {
2149                 /*  Ensure requested hash types are supported by the device */
2150                 if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2151                         return -EINVAL;
2152
2153                 hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2154         }
2155
2156         if (!hw->rss_key) {
2157                 /* Setup default RSS key if not already setup by the user */
2158                 hw->rss_key = rte_malloc_socket("rss_key",
2159                                 VIRTIO_NET_RSS_KEY_SIZE, 0,
2160                                 eth_dev->device->numa_node);
2161                 if (!hw->rss_key) {
2162                         PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2163                         return -1;
2164                 }
2165         }
2166
2167         if (rss_conf->rss_key && rss_conf->rss_key_len) {
2168                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2169                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2170                                         VIRTIO_NET_RSS_KEY_SIZE);
2171                         return -EINVAL;
2172                 }
2173                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2174         } else {
2175                 memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2176         }
2177
2178         if (!hw->rss_reta) {
2179                 /* Setup default RSS reta if not already setup by the user */
2180                 hw->rss_reta = rte_zmalloc_socket("rss_reta",
2181                                 VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2182                                 eth_dev->device->numa_node);
2183                 if (!hw->rss_reta) {
2184                         PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2185                         return -1;
2186                 }
2187
2188                 hw->rss_rx_queues = 0;
2189         }
2190
2191         /* Re-initialize the RSS reta if the number of RX queues has changed */
2192         if (hw->rss_rx_queues != nb_rx_queues) {
2193                 for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2194                         hw->rss_reta[i] = i % nb_rx_queues;
2195                 hw->rss_rx_queues = nb_rx_queues;
2196         }
2197
2198         return 0;
2199 }
2200
2201 #define DUPLEX_UNKNOWN   0xff
2202 /* reset device and renegotiate features if needed */
2203 static int
2204 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2205 {
2206         struct virtio_hw *hw = eth_dev->data->dev_private;
2207         struct virtio_net_config *config;
2208         struct virtio_net_config local_config;
2209         int ret;
2210
2211         /* Reset the device although not necessary at startup */
2212         virtio_reset(hw);
2213
2214         if (hw->vqs) {
2215                 virtio_dev_free_mbufs(eth_dev);
2216                 virtio_free_queues(hw);
2217         }
2218
2219         /* Tell the host we've noticed this device. */
2220         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2221
2222         /* Tell the host we've known how to drive the device. */
2223         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2224         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2225                 return -1;
2226
2227         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2228
2229         /* If host does not support both status and MSI-X then disable LSC */
2230         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2231                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2232         else
2233                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2234
2235         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2236
2237         /* Setting up rx_header size for the device */
2238         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2239             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2240             virtio_with_packed_queue(hw))
2241                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2242         else
2243                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2244
2245         /* Copy the permanent MAC address to: virtio_hw */
2246         virtio_get_hwaddr(hw);
2247         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2248                         &eth_dev->data->mac_addrs[0]);
2249         PMD_INIT_LOG(DEBUG,
2250                      "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2251                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2252                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2253
2254         hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2255                              virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2256         if (hw->get_speed_via_feat)
2257                 virtio_get_speed_duplex(eth_dev, NULL);
2258         if (hw->duplex == DUPLEX_UNKNOWN)
2259                 hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2260         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2261                 hw->speed, hw->duplex);
2262         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2263                 config = &local_config;
2264
2265                 virtio_read_dev_config(hw,
2266                         offsetof(struct virtio_net_config, mac),
2267                         &config->mac, sizeof(config->mac));
2268
2269                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2270                         virtio_read_dev_config(hw,
2271                                 offsetof(struct virtio_net_config, status),
2272                                 &config->status, sizeof(config->status));
2273                 } else {
2274                         PMD_INIT_LOG(DEBUG,
2275                                      "VIRTIO_NET_F_STATUS is not supported");
2276                         config->status = 0;
2277                 }
2278
2279                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2280                                 virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2281                         virtio_read_dev_config(hw,
2282                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
2283                                 &config->max_virtqueue_pairs,
2284                                 sizeof(config->max_virtqueue_pairs));
2285                 } else {
2286                         PMD_INIT_LOG(DEBUG,
2287                                      "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2288                         config->max_virtqueue_pairs = 1;
2289                 }
2290
2291                 hw->max_queue_pairs = config->max_virtqueue_pairs;
2292
2293                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2294                         virtio_read_dev_config(hw,
2295                                 offsetof(struct virtio_net_config, mtu),
2296                                 &config->mtu,
2297                                 sizeof(config->mtu));
2298
2299                         /*
2300                          * MTU value has already been checked at negotiation
2301                          * time, but check again in case it has changed since
2302                          * then, which should not happen.
2303                          */
2304                         if (config->mtu < RTE_ETHER_MIN_MTU) {
2305                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2306                                                 config->mtu);
2307                                 return -1;
2308                         }
2309
2310                         hw->max_mtu = config->mtu;
2311                         /* Set initial MTU to maximum one supported by vhost */
2312                         eth_dev->data->mtu = config->mtu;
2313
2314                 } else {
2315                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2316                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
2317                 }
2318
2319                 hw->rss_hash_types = 0;
2320                 if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2321                         if (virtio_dev_rss_init(eth_dev))
2322                                 return -1;
2323
2324                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2325                                 config->max_virtqueue_pairs);
2326                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2327                 PMD_INIT_LOG(DEBUG,
2328                                 "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2329                                 config->mac[0], config->mac[1],
2330                                 config->mac[2], config->mac[3],
2331                                 config->mac[4], config->mac[5]);
2332         } else {
2333                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2334                 hw->max_queue_pairs = 1;
2335                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2336                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
2337         }
2338
2339         ret = virtio_alloc_queues(eth_dev);
2340         if (ret < 0)
2341                 return ret;
2342
2343         if (eth_dev->data->dev_conf.intr_conf.rxq) {
2344                 if (virtio_configure_intr(eth_dev) < 0) {
2345                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
2346                         virtio_free_queues(hw);
2347                         return -1;
2348                 }
2349         }
2350
2351         virtio_reinit_complete(hw);
2352
2353         return 0;
2354 }
2355
2356 /*
2357  * This function is based on probe() function in virtio_pci.c
2358  * It returns 0 on success.
2359  */
2360 int
2361 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2362 {
2363         struct virtio_hw *hw = eth_dev->data->dev_private;
2364         uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2365         int vectorized = 0;
2366         int ret;
2367
2368         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2369                 PMD_INIT_LOG(ERR,
2370                         "Not sufficient headroom required = %d, avail = %d",
2371                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2372                         RTE_PKTMBUF_HEADROOM);
2373
2374                 return -1;
2375         }
2376
2377         eth_dev->dev_ops = &virtio_eth_dev_ops;
2378
2379         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2380                 set_rxtx_funcs(eth_dev);
2381                 return 0;
2382         }
2383
2384         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2385         if (ret < 0)
2386                 return ret;
2387         hw->speed = speed;
2388         hw->duplex = DUPLEX_UNKNOWN;
2389
2390         /* Allocate memory for storing MAC addresses */
2391         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2392                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2393         if (eth_dev->data->mac_addrs == NULL) {
2394                 PMD_INIT_LOG(ERR,
2395                         "Failed to allocate %d bytes needed to store MAC addresses",
2396                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2397                 return -ENOMEM;
2398         }
2399
2400         rte_spinlock_init(&hw->state_lock);
2401
2402         /* reset device and negotiate default features */
2403         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2404         if (ret < 0)
2405                 goto err_virtio_init;
2406
2407         if (vectorized) {
2408                 if (!virtio_with_packed_queue(hw)) {
2409                         hw->use_vec_rx = 1;
2410                 } else {
2411 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2412                         hw->use_vec_rx = 1;
2413                         hw->use_vec_tx = 1;
2414 #else
2415                         PMD_DRV_LOG(INFO,
2416                                 "building environment do not support packed ring vectorized");
2417 #endif
2418                 }
2419         }
2420
2421         hw->opened = 1;
2422
2423         return 0;
2424
2425 err_virtio_init:
2426         rte_free(eth_dev->data->mac_addrs);
2427         eth_dev->data->mac_addrs = NULL;
2428         return ret;
2429 }
2430
2431 static uint32_t
2432 virtio_dev_speed_capa_get(uint32_t speed)
2433 {
2434         switch (speed) {
2435         case RTE_ETH_SPEED_NUM_10G:
2436                 return RTE_ETH_LINK_SPEED_10G;
2437         case RTE_ETH_SPEED_NUM_20G:
2438                 return RTE_ETH_LINK_SPEED_20G;
2439         case RTE_ETH_SPEED_NUM_25G:
2440                 return RTE_ETH_LINK_SPEED_25G;
2441         case RTE_ETH_SPEED_NUM_40G:
2442                 return RTE_ETH_LINK_SPEED_40G;
2443         case RTE_ETH_SPEED_NUM_50G:
2444                 return RTE_ETH_LINK_SPEED_50G;
2445         case RTE_ETH_SPEED_NUM_56G:
2446                 return RTE_ETH_LINK_SPEED_56G;
2447         case RTE_ETH_SPEED_NUM_100G:
2448                 return RTE_ETH_LINK_SPEED_100G;
2449         case RTE_ETH_SPEED_NUM_200G:
2450                 return RTE_ETH_LINK_SPEED_200G;
2451         default:
2452                 return 0;
2453         }
2454 }
2455
2456 static int vectorized_check_handler(__rte_unused const char *key,
2457                 const char *value, void *ret_val)
2458 {
2459         if (strcmp(value, "1") == 0)
2460                 *(int *)ret_val = 1;
2461         else
2462                 *(int *)ret_val = 0;
2463
2464         return 0;
2465 }
2466
2467 #define VIRTIO_ARG_SPEED      "speed"
2468 #define VIRTIO_ARG_VECTORIZED "vectorized"
2469
2470 static int
2471 link_speed_handler(const char *key __rte_unused,
2472                 const char *value, void *ret_val)
2473 {
2474         uint32_t val;
2475         if (!value || !ret_val)
2476                 return -EINVAL;
2477         val = strtoul(value, NULL, 0);
2478         /* validate input */
2479         if (virtio_dev_speed_capa_get(val) == 0)
2480                 return -EINVAL;
2481         *(uint32_t *)ret_val = val;
2482
2483         return 0;
2484 }
2485
2486
2487 static int
2488 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2489 {
2490         struct rte_kvargs *kvlist;
2491         int ret = 0;
2492
2493         if (devargs == NULL)
2494                 return 0;
2495
2496         kvlist = rte_kvargs_parse(devargs->args, NULL);
2497         if (kvlist == NULL) {
2498                 PMD_INIT_LOG(ERR, "error when parsing param");
2499                 return 0;
2500         }
2501
2502         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2503                 ret = rte_kvargs_process(kvlist,
2504                                         VIRTIO_ARG_SPEED,
2505                                         link_speed_handler, speed);
2506                 if (ret < 0) {
2507                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2508                                         VIRTIO_ARG_SPEED);
2509                         goto exit;
2510                 }
2511         }
2512
2513         if (vectorized &&
2514                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2515                 ret = rte_kvargs_process(kvlist,
2516                                 VIRTIO_ARG_VECTORIZED,
2517                                 vectorized_check_handler, vectorized);
2518                 if (ret < 0) {
2519                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2520                                         VIRTIO_ARG_VECTORIZED);
2521                         goto exit;
2522                 }
2523         }
2524
2525 exit:
2526         rte_kvargs_free(kvlist);
2527         return ret;
2528 }
2529
2530 static uint8_t
2531 rx_offload_enabled(struct virtio_hw *hw)
2532 {
2533         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2534                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2535                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2536 }
2537
2538 static uint8_t
2539 tx_offload_enabled(struct virtio_hw *hw)
2540 {
2541         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2542                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2543                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2544 }
2545
2546 /*
2547  * Configure virtio device
2548  * It returns 0 on success.
2549  */
2550 static int
2551 virtio_dev_configure(struct rte_eth_dev *dev)
2552 {
2553         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2554         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2555         struct virtio_hw *hw = dev->data->dev_private;
2556         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2557                 hw->vtnet_hdr_size;
2558         uint64_t rx_offloads = rxmode->offloads;
2559         uint64_t tx_offloads = txmode->offloads;
2560         uint64_t req_features;
2561         int ret;
2562
2563         PMD_INIT_LOG(DEBUG, "configure");
2564         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2565
2566         if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2567                 PMD_DRV_LOG(ERR,
2568                         "Unsupported Rx multi queue mode %d",
2569                         rxmode->mq_mode);
2570                 return -EINVAL;
2571         }
2572
2573         if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2574                 PMD_DRV_LOG(ERR,
2575                         "Unsupported Tx multi queue mode %d",
2576                         txmode->mq_mode);
2577                 return -EINVAL;
2578         }
2579
2580         if (dev->data->dev_conf.intr_conf.rxq) {
2581                 ret = virtio_init_device(dev, hw->req_guest_features);
2582                 if (ret < 0)
2583                         return ret;
2584         }
2585
2586         if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2587                 req_features |= (1ULL << VIRTIO_NET_F_RSS);
2588
2589         if (rxmode->mtu > hw->max_mtu)
2590                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2591
2592         hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2593
2594         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2595                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2596                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2597
2598         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2599                 req_features |=
2600                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2601                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2602
2603         if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2604                            RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2605                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2606
2607         if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2608                 req_features |=
2609                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2610                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2611
2612         /* if request features changed, reinit the device */
2613         if (req_features != hw->req_guest_features) {
2614                 ret = virtio_init_device(dev, req_features);
2615                 if (ret < 0)
2616                         return ret;
2617         }
2618
2619         if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2620                         !virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2621                 PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2622                 return -ENOTSUP;
2623         }
2624
2625         if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2626                             RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2627                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2628                 PMD_DRV_LOG(ERR,
2629                         "rx checksum not available on this host");
2630                 return -ENOTSUP;
2631         }
2632
2633         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2634                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2635                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2636                 PMD_DRV_LOG(ERR,
2637                         "Large Receive Offload not available on this host");
2638                 return -ENOTSUP;
2639         }
2640
2641         /* start control queue */
2642         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2643                 virtio_dev_cq_start(dev);
2644
2645         if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2646                 hw->vlan_strip = 1;
2647
2648         hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2649
2650         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2651                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2652                 PMD_DRV_LOG(ERR,
2653                             "vlan filtering not available on this host");
2654                 return -ENOTSUP;
2655         }
2656
2657         hw->has_tx_offload = tx_offload_enabled(hw);
2658         hw->has_rx_offload = rx_offload_enabled(hw);
2659
2660         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2661                 /* Enable vector (0) for Link State Interrupt */
2662                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2663                                 VIRTIO_MSI_NO_VECTOR) {
2664                         PMD_DRV_LOG(ERR, "failed to set config vector");
2665                         return -EBUSY;
2666                 }
2667
2668         if (virtio_with_packed_queue(hw)) {
2669 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2670                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2671                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2672                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2673                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2674                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2675                         PMD_DRV_LOG(INFO,
2676                                 "disabled packed ring vectorized path for requirements not met");
2677                         hw->use_vec_rx = 0;
2678                         hw->use_vec_tx = 0;
2679                 }
2680 #elif defined(RTE_ARCH_ARM)
2681                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2682                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2683                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2684                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2685                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2686                         PMD_DRV_LOG(INFO,
2687                                 "disabled packed ring vectorized path for requirements not met");
2688                         hw->use_vec_rx = 0;
2689                         hw->use_vec_tx = 0;
2690                 }
2691 #else
2692                 hw->use_vec_rx = 0;
2693                 hw->use_vec_tx = 0;
2694 #endif
2695
2696                 if (hw->use_vec_rx) {
2697                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2698                                 PMD_DRV_LOG(INFO,
2699                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2700                                 hw->use_vec_rx = 0;
2701                         }
2702
2703                         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2704                                 PMD_DRV_LOG(INFO,
2705                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2706                                 hw->use_vec_rx = 0;
2707                         }
2708                 }
2709         } else {
2710                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2711                         hw->use_inorder_tx = 1;
2712                         hw->use_inorder_rx = 1;
2713                         hw->use_vec_rx = 0;
2714                 }
2715
2716                 if (hw->use_vec_rx) {
2717 #if defined RTE_ARCH_ARM
2718                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2719                                 PMD_DRV_LOG(INFO,
2720                                         "disabled split ring vectorized path for requirement not met");
2721                                 hw->use_vec_rx = 0;
2722                         }
2723 #endif
2724                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2725                                 PMD_DRV_LOG(INFO,
2726                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2727                                 hw->use_vec_rx = 0;
2728                         }
2729
2730                         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2731                                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2732                                            RTE_ETH_RX_OFFLOAD_TCP_LRO |
2733                                            RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2734                                 PMD_DRV_LOG(INFO,
2735                                         "disabled split ring vectorized rx for offloading enabled");
2736                                 hw->use_vec_rx = 0;
2737                         }
2738
2739                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2740                                 PMD_DRV_LOG(INFO,
2741                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2742                                 hw->use_vec_rx = 0;
2743                         }
2744                 }
2745         }
2746
2747         return 0;
2748 }
2749
2750
2751 static int
2752 virtio_dev_start(struct rte_eth_dev *dev)
2753 {
2754         uint16_t nb_queues, i;
2755         struct virtqueue *vq;
2756         struct virtio_hw *hw = dev->data->dev_private;
2757         int ret;
2758
2759         /* Finish the initialization of the queues */
2760         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2761                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2762                 if (ret < 0)
2763                         return ret;
2764         }
2765         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2766                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2767                 if (ret < 0)
2768                         return ret;
2769         }
2770
2771         /* check if lsc interrupt feature is enabled */
2772         if (dev->data->dev_conf.intr_conf.lsc) {
2773                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2774                         PMD_DRV_LOG(ERR, "link status not supported by host");
2775                         return -ENOTSUP;
2776                 }
2777         }
2778
2779         /* Enable uio/vfio intr/eventfd mapping: although we already did that
2780          * in device configure, but it could be unmapped  when device is
2781          * stopped.
2782          */
2783         if (dev->data->dev_conf.intr_conf.lsc ||
2784             dev->data->dev_conf.intr_conf.rxq) {
2785                 virtio_intr_disable(dev);
2786
2787                 /* Setup interrupt callback  */
2788                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2789                         rte_intr_callback_register(dev->intr_handle,
2790                                                    virtio_interrupt_handler,
2791                                                    dev);
2792
2793                 if (virtio_intr_enable(dev) < 0) {
2794                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2795                         return -EIO;
2796                 }
2797         }
2798
2799         /*Notify the backend
2800          *Otherwise the tap backend might already stop its queue due to fullness.
2801          *vhost backend will have no chance to be waked up
2802          */
2803         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2804         if (hw->max_queue_pairs > 1) {
2805                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2806                         return -EINVAL;
2807         }
2808
2809         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2810
2811         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2812                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2813                 /* Flush the old packets */
2814                 virtqueue_rxvq_flush(vq);
2815                 virtqueue_notify(vq);
2816         }
2817
2818         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2819                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2820                 virtqueue_notify(vq);
2821         }
2822
2823         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2824
2825         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2826                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2827                 VIRTQUEUE_DUMP(vq);
2828         }
2829
2830         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2831                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2832                 VIRTQUEUE_DUMP(vq);
2833         }
2834
2835         set_rxtx_funcs(dev);
2836         hw->started = 1;
2837
2838         /* Initialize Link state */
2839         virtio_dev_link_update(dev, 0);
2840
2841         return 0;
2842 }
2843
2844 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2845 {
2846         struct virtio_hw *hw = dev->data->dev_private;
2847         uint16_t nr_vq = virtio_get_nr_vq(hw);
2848         const char *type __rte_unused;
2849         unsigned int i, mbuf_num = 0;
2850         struct virtqueue *vq;
2851         struct rte_mbuf *buf;
2852         int queue_type;
2853
2854         if (hw->vqs == NULL)
2855                 return;
2856
2857         for (i = 0; i < nr_vq; i++) {
2858                 vq = hw->vqs[i];
2859                 if (!vq)
2860                         continue;
2861
2862                 queue_type = virtio_get_queue_type(hw, i);
2863                 if (queue_type == VTNET_RQ)
2864                         type = "rxq";
2865                 else if (queue_type == VTNET_TQ)
2866                         type = "txq";
2867                 else
2868                         continue;
2869
2870                 PMD_INIT_LOG(DEBUG,
2871                         "Before freeing %s[%d] used and unused buf",
2872                         type, i);
2873                 VIRTQUEUE_DUMP(vq);
2874
2875                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2876                         rte_pktmbuf_free(buf);
2877                         mbuf_num++;
2878                 }
2879
2880                 PMD_INIT_LOG(DEBUG,
2881                         "After freeing %s[%d] used and unused buf",
2882                         type, i);
2883                 VIRTQUEUE_DUMP(vq);
2884         }
2885
2886         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2887 }
2888
2889 static void
2890 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2891 {
2892         struct virtio_hw *hw = dev->data->dev_private;
2893         struct virtqueue *vq;
2894         int qidx;
2895         void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2896
2897         if (virtio_with_packed_queue(hw)) {
2898                 if (hw->use_vec_tx)
2899                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2900                 else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2901                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2902                 else
2903                         xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2904         } else {
2905                 if (hw->use_inorder_tx)
2906                         xmit_cleanup = &virtio_xmit_cleanup_inorder;
2907                 else
2908                         xmit_cleanup = &virtio_xmit_cleanup;
2909         }
2910
2911         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2912                 vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2913                 if (vq != NULL)
2914                         xmit_cleanup(vq, virtqueue_nused(vq));
2915         }
2916 }
2917
2918 /*
2919  * Stop device: disable interrupt and mark link down
2920  */
2921 int
2922 virtio_dev_stop(struct rte_eth_dev *dev)
2923 {
2924         struct virtio_hw *hw = dev->data->dev_private;
2925         struct rte_eth_link link;
2926         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2927
2928         PMD_INIT_LOG(DEBUG, "stop");
2929         dev->data->dev_started = 0;
2930
2931         rte_spinlock_lock(&hw->state_lock);
2932         if (!hw->started)
2933                 goto out_unlock;
2934         hw->started = 0;
2935
2936         virtio_tx_completed_cleanup(dev);
2937
2938         if (intr_conf->lsc || intr_conf->rxq) {
2939                 virtio_intr_disable(dev);
2940
2941                 /* Reset interrupt callback  */
2942                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2943                         rte_intr_callback_unregister(dev->intr_handle,
2944                                                      virtio_interrupt_handler,
2945                                                      dev);
2946                 }
2947         }
2948
2949         memset(&link, 0, sizeof(link));
2950         rte_eth_linkstatus_set(dev, &link);
2951 out_unlock:
2952         rte_spinlock_unlock(&hw->state_lock);
2953
2954         return 0;
2955 }
2956
2957 static int
2958 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2959 {
2960         struct rte_eth_link link;
2961         uint16_t status;
2962         struct virtio_hw *hw = dev->data->dev_private;
2963
2964         memset(&link, 0, sizeof(link));
2965         link.link_duplex = hw->duplex;
2966         link.link_speed  = hw->speed;
2967         link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2968
2969         if (!hw->started) {
2970                 link.link_status = RTE_ETH_LINK_DOWN;
2971                 link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2972         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2973                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2974                 virtio_read_dev_config(hw,
2975                                 offsetof(struct virtio_net_config, status),
2976                                 &status, sizeof(status));
2977                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2978                         link.link_status = RTE_ETH_LINK_DOWN;
2979                         link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2980                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2981                                      dev->data->port_id);
2982                 } else {
2983                         link.link_status = RTE_ETH_LINK_UP;
2984                         if (hw->get_speed_via_feat)
2985                                 virtio_get_speed_duplex(dev, &link);
2986                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2987                                      dev->data->port_id);
2988                 }
2989         } else {
2990                 link.link_status = RTE_ETH_LINK_UP;
2991                 if (hw->get_speed_via_feat)
2992                         virtio_get_speed_duplex(dev, &link);
2993         }
2994
2995         return rte_eth_linkstatus_set(dev, &link);
2996 }
2997
2998 static int
2999 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
3000 {
3001         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3002         struct virtio_hw *hw = dev->data->dev_private;
3003         uint64_t offloads = rxmode->offloads;
3004
3005         if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3006                 if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3007                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3008
3009                         PMD_DRV_LOG(NOTICE,
3010                                 "vlan filtering not available on this host");
3011
3012                         return -ENOTSUP;
3013                 }
3014         }
3015
3016         if (mask & RTE_ETH_VLAN_STRIP_MASK)
3017                 hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3018
3019         return 0;
3020 }
3021
3022 static int
3023 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3024 {
3025         uint64_t tso_mask, host_features;
3026         uint32_t rss_hash_types = 0;
3027         struct virtio_hw *hw = dev->data->dev_private;
3028         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3029
3030         dev_info->max_rx_queues =
3031                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3032         dev_info->max_tx_queues =
3033                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3034         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3035         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3036         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3037         dev_info->max_mtu = hw->max_mtu;
3038
3039         host_features = VIRTIO_OPS(hw)->get_features(hw);
3040         dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3041         if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3042                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3043         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3044                 dev_info->rx_offload_capa |=
3045                         RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3046                         RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3047         }
3048         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3049                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3050         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3051                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
3052         if ((host_features & tso_mask) == tso_mask)
3053                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3054
3055         dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3056                                     RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3057         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3058                 dev_info->tx_offload_capa |=
3059                         RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3060                         RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3061         }
3062         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3063                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
3064         if ((host_features & tso_mask) == tso_mask)
3065                 dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3066
3067         if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3068                 virtio_dev_get_rss_config(hw, &rss_hash_types);
3069                 dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3070                 dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3071                 dev_info->flow_type_rss_offloads =
3072                         virtio_to_ethdev_rss_offloads(rss_hash_types);
3073         } else {
3074                 dev_info->hash_key_size = 0;
3075                 dev_info->reta_size = 0;
3076                 dev_info->flow_type_rss_offloads = 0;
3077         }
3078
3079         if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3080                 /*
3081                  * According to 2.7 Packed Virtqueues,
3082                  * 2.7.10.1 Structure Size and Alignment:
3083                  * The Queue Size value does not have to be a power of 2.
3084                  */
3085                 dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3086                 dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3087         } else {
3088                 /*
3089                  * According to 2.6 Split Virtqueues:
3090                  * Queue Size value is always a power of 2. The maximum Queue
3091                  * Size value is 32768.
3092                  */
3093                 dev_info->rx_desc_lim.nb_max = 32768;
3094                 dev_info->tx_desc_lim.nb_max = 32768;
3095         }
3096         /*
3097          * Actual minimum is not the same for virtqueues of different kinds,
3098          * but to avoid tangling the code with separate branches, rely on
3099          * default thresholds since desc number must be at least of their size.
3100          */
3101         dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3102                                                RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3103         dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3104         dev_info->rx_desc_lim.nb_align = 1;
3105         dev_info->tx_desc_lim.nb_align = 1;
3106
3107         return 0;
3108 }
3109
3110 /*
3111  * It enables testpmd to collect per queue stats.
3112  */
3113 static int
3114 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3115 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3116 __rte_unused uint8_t is_rx)
3117 {
3118         return 0;
3119 }
3120
3121 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3122 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);