fix spelling in comments and strings
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47         uint32_t *speed,
48         int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50                                 struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52         int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 static int virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
55                 struct rte_eth_rss_conf *rss_conf);
56 static int virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
57                 struct rte_eth_rss_conf *rss_conf);
58 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
59                          struct rte_eth_rss_reta_entry64 *reta_conf,
60                          uint16_t reta_size);
61 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
62                          struct rte_eth_rss_reta_entry64 *reta_conf,
63                          uint16_t reta_size);
64
65 static void virtio_set_hwaddr(struct virtio_hw *hw);
66 static void virtio_get_hwaddr(struct virtio_hw *hw);
67
68 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
69                                  struct rte_eth_stats *stats);
70 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
71                                  struct rte_eth_xstat *xstats, unsigned n);
72 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
73                                        struct rte_eth_xstat_name *xstats_names,
74                                        unsigned limit);
75 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
76 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
77 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
78                                 uint16_t vlan_id, int on);
79 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
80                                 struct rte_ether_addr *mac_addr,
81                                 uint32_t index, uint32_t vmdq);
82 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
83 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
84                                 struct rte_ether_addr *mac_addr);
85
86 static int virtio_intr_disable(struct rte_eth_dev *dev);
87 static int virtio_get_monitor_addr(void *rx_queue,
88                                 struct rte_power_monitor_cond *pmc);
89
90 static int virtio_dev_queue_stats_mapping_set(
91         struct rte_eth_dev *eth_dev,
92         uint16_t queue_id,
93         uint8_t stat_idx,
94         uint8_t is_rx);
95
96 static void virtio_notify_peers(struct rte_eth_dev *dev);
97 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
98
99 struct rte_virtio_xstats_name_off {
100         char name[RTE_ETH_XSTATS_NAME_SIZE];
101         unsigned offset;
102 };
103
104 /* [rt]x_qX_ is prepended to the name string here */
105 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
106         {"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
107         {"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
108         {"errors",                 offsetof(struct virtnet_rx, stats.errors)},
109         {"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
110         {"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
111         {"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
112         {"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
113         {"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
114         {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
115         {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
116         {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
117         {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
118         {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
119 };
120
121 /* [rt]x_qX_ is prepended to the name string here */
122 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
123         {"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
124         {"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
125         {"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
126         {"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
127         {"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
128         {"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
129         {"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
130         {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
131         {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
132         {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
133         {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
134         {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
135 };
136
137 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
138                             sizeof(rte_virtio_rxq_stat_strings[0]))
139 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
140                             sizeof(rte_virtio_txq_stat_strings[0]))
141
142 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
143
144 static struct virtio_pmd_ctrl *
145 virtio_send_command_packed(struct virtnet_ctl *cvq,
146                            struct virtio_pmd_ctrl *ctrl,
147                            int *dlen, int pkt_num)
148 {
149         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
150         int head;
151         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
152         struct virtio_pmd_ctrl *result;
153         uint16_t flags;
154         int sum = 0;
155         int nb_descs = 0;
156         int k;
157
158         /*
159          * Format is enforced in qemu code:
160          * One TX packet for header;
161          * At least one TX packet per argument;
162          * One RX packet for ACK.
163          */
164         head = vq->vq_avail_idx;
165         flags = vq->vq_packed.cached_flags;
166         desc[head].addr = cvq->virtio_net_hdr_mem;
167         desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
168         vq->vq_free_cnt--;
169         nb_descs++;
170         if (++vq->vq_avail_idx >= vq->vq_nentries) {
171                 vq->vq_avail_idx -= vq->vq_nentries;
172                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
173         }
174
175         for (k = 0; k < pkt_num; k++) {
176                 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
177                         + sizeof(struct virtio_net_ctrl_hdr)
178                         + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
179                 desc[vq->vq_avail_idx].len = dlen[k];
180                 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
181                         vq->vq_packed.cached_flags;
182                 sum += dlen[k];
183                 vq->vq_free_cnt--;
184                 nb_descs++;
185                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
186                         vq->vq_avail_idx -= vq->vq_nentries;
187                         vq->vq_packed.cached_flags ^=
188                                 VRING_PACKED_DESC_F_AVAIL_USED;
189                 }
190         }
191
192         desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
193                 + sizeof(struct virtio_net_ctrl_hdr);
194         desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
195         desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
196                 vq->vq_packed.cached_flags;
197         vq->vq_free_cnt--;
198         nb_descs++;
199         if (++vq->vq_avail_idx >= vq->vq_nentries) {
200                 vq->vq_avail_idx -= vq->vq_nentries;
201                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
202         }
203
204         virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
205                         vq->hw->weak_barriers);
206
207         virtio_wmb(vq->hw->weak_barriers);
208         virtqueue_notify(vq);
209
210         /* wait for used desc in virtqueue
211          * desc_is_used has a load-acquire or rte_io_rmb inside
212          */
213         while (!desc_is_used(&desc[head], vq))
214                 usleep(100);
215
216         /* now get used descriptors */
217         vq->vq_free_cnt += nb_descs;
218         vq->vq_used_cons_idx += nb_descs;
219         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
220                 vq->vq_used_cons_idx -= vq->vq_nentries;
221                 vq->vq_packed.used_wrap_counter ^= 1;
222         }
223
224         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
225                         "vq->vq_avail_idx=%d\n"
226                         "vq->vq_used_cons_idx=%d\n"
227                         "vq->vq_packed.cached_flags=0x%x\n"
228                         "vq->vq_packed.used_wrap_counter=%d",
229                         vq->vq_free_cnt,
230                         vq->vq_avail_idx,
231                         vq->vq_used_cons_idx,
232                         vq->vq_packed.cached_flags,
233                         vq->vq_packed.used_wrap_counter);
234
235         result = cvq->virtio_net_hdr_mz->addr;
236         return result;
237 }
238
239 static struct virtio_pmd_ctrl *
240 virtio_send_command_split(struct virtnet_ctl *cvq,
241                           struct virtio_pmd_ctrl *ctrl,
242                           int *dlen, int pkt_num)
243 {
244         struct virtio_pmd_ctrl *result;
245         struct virtqueue *vq = virtnet_cq_to_vq(cvq);
246         uint32_t head, i;
247         int k, sum = 0;
248
249         head = vq->vq_desc_head_idx;
250
251         /*
252          * Format is enforced in qemu code:
253          * One TX packet for header;
254          * At least one TX packet per argument;
255          * One RX packet for ACK.
256          */
257         vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
258         vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
259         vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
260         vq->vq_free_cnt--;
261         i = vq->vq_split.ring.desc[head].next;
262
263         for (k = 0; k < pkt_num; k++) {
264                 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
265                 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
266                         + sizeof(struct virtio_net_ctrl_hdr)
267                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
268                 vq->vq_split.ring.desc[i].len = dlen[k];
269                 sum += dlen[k];
270                 vq->vq_free_cnt--;
271                 i = vq->vq_split.ring.desc[i].next;
272         }
273
274         vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
275         vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
276                         + sizeof(struct virtio_net_ctrl_hdr);
277         vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
278         vq->vq_free_cnt--;
279
280         vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
281
282         vq_update_avail_ring(vq, head);
283         vq_update_avail_idx(vq);
284
285         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
286
287         virtqueue_notify(vq);
288
289         while (virtqueue_nused(vq) == 0)
290                 usleep(100);
291
292         while (virtqueue_nused(vq)) {
293                 uint32_t idx, desc_idx, used_idx;
294                 struct vring_used_elem *uep;
295
296                 used_idx = (uint32_t)(vq->vq_used_cons_idx
297                                 & (vq->vq_nentries - 1));
298                 uep = &vq->vq_split.ring.used->ring[used_idx];
299                 idx = (uint32_t) uep->id;
300                 desc_idx = idx;
301
302                 while (vq->vq_split.ring.desc[desc_idx].flags &
303                                 VRING_DESC_F_NEXT) {
304                         desc_idx = vq->vq_split.ring.desc[desc_idx].next;
305                         vq->vq_free_cnt++;
306                 }
307
308                 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
309                 vq->vq_desc_head_idx = idx;
310
311                 vq->vq_used_cons_idx++;
312                 vq->vq_free_cnt++;
313         }
314
315         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
316                         vq->vq_free_cnt, vq->vq_desc_head_idx);
317
318         result = cvq->virtio_net_hdr_mz->addr;
319         return result;
320 }
321
322 static int
323 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
324                     int *dlen, int pkt_num)
325 {
326         virtio_net_ctrl_ack status = ~0;
327         struct virtio_pmd_ctrl *result;
328         struct virtqueue *vq;
329
330         ctrl->status = status;
331
332         if (!cvq) {
333                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
334                 return -1;
335         }
336
337         rte_spinlock_lock(&cvq->lock);
338         vq = virtnet_cq_to_vq(cvq);
339
340         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
341                 "vq->hw->cvq = %p vq = %p",
342                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
343
344         if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
345                 rte_spinlock_unlock(&cvq->lock);
346                 return -1;
347         }
348
349         memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
350                 sizeof(struct virtio_pmd_ctrl));
351
352         if (virtio_with_packed_queue(vq->hw))
353                 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
354         else
355                 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
356
357         rte_spinlock_unlock(&cvq->lock);
358         return result->status;
359 }
360
361 static int
362 virtio_set_multiple_queues_rss(struct rte_eth_dev *dev, uint16_t nb_queues)
363 {
364         struct virtio_hw *hw = dev->data->dev_private;
365         struct virtio_pmd_ctrl ctrl;
366         struct virtio_net_ctrl_rss rss;
367         int dlen, ret;
368
369         rss.hash_types = hw->rss_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
370         RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(VIRTIO_NET_RSS_RETA_SIZE));
371         rss.indirection_table_mask = VIRTIO_NET_RSS_RETA_SIZE - 1;
372         rss.unclassified_queue = 0;
373         memcpy(rss.indirection_table, hw->rss_reta, VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t));
374         rss.max_tx_vq = nb_queues;
375         rss.hash_key_length = VIRTIO_NET_RSS_KEY_SIZE;
376         memcpy(rss.hash_key_data, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
377
378         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
379         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_RSS_CONFIG;
380         memcpy(ctrl.data, &rss, sizeof(rss));
381
382         dlen = sizeof(rss);
383
384         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
385         if (ret) {
386                 PMD_INIT_LOG(ERR, "RSS multiqueue configured but send command failed");
387                 return -EINVAL;
388         }
389
390         return 0;
391 }
392
393 static int
394 virtio_set_multiple_queues_auto(struct rte_eth_dev *dev, uint16_t nb_queues)
395 {
396         struct virtio_hw *hw = dev->data->dev_private;
397         struct virtio_pmd_ctrl ctrl;
398         int dlen;
399         int ret;
400
401         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
402         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
403         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
404
405         dlen = sizeof(uint16_t);
406
407         ret = virtio_send_command(hw->cvq, &ctrl, &dlen, 1);
408         if (ret) {
409                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
410                           "failed, this is too late now...");
411                 return -EINVAL;
412         }
413
414         return 0;
415 }
416
417 static int
418 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
419 {
420         struct virtio_hw *hw = dev->data->dev_private;
421
422         if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
423                 return virtio_set_multiple_queues_rss(dev, nb_queues);
424         else
425                 return virtio_set_multiple_queues_auto(dev, nb_queues);
426 }
427
428 static uint16_t
429 virtio_get_nr_vq(struct virtio_hw *hw)
430 {
431         uint16_t nr_vq = hw->max_queue_pairs * 2;
432
433         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
434                 nr_vq += 1;
435
436         return nr_vq;
437 }
438
439 static void
440 virtio_init_vring(struct virtqueue *vq)
441 {
442         int size = vq->vq_nentries;
443         uint8_t *ring_mem = vq->vq_ring_virt_mem;
444
445         PMD_INIT_FUNC_TRACE();
446
447         memset(ring_mem, 0, vq->vq_ring_size);
448
449         vq->vq_used_cons_idx = 0;
450         vq->vq_desc_head_idx = 0;
451         vq->vq_avail_idx = 0;
452         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
453         vq->vq_free_cnt = vq->vq_nentries;
454         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
455         if (virtio_with_packed_queue(vq->hw)) {
456                 vring_init_packed(&vq->vq_packed.ring, ring_mem,
457                                   VIRTIO_VRING_ALIGN, size);
458                 vring_desc_init_packed(vq, size);
459         } else {
460                 struct vring *vr = &vq->vq_split.ring;
461
462                 vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
463                 vring_desc_init_split(vr->desc, size);
464         }
465         /*
466          * Disable device(host) interrupting guest
467          */
468         virtqueue_disable_intr(vq);
469 }
470
471 static int
472 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
473 {
474         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
475         char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
476         const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
477         unsigned int vq_size, size;
478         struct virtio_hw *hw = dev->data->dev_private;
479         struct virtnet_rx *rxvq = NULL;
480         struct virtnet_tx *txvq = NULL;
481         struct virtnet_ctl *cvq = NULL;
482         struct virtqueue *vq;
483         size_t sz_hdr_mz = 0;
484         void *sw_ring = NULL;
485         int queue_type = virtio_get_queue_type(hw, queue_idx);
486         int ret;
487         int numa_node = dev->device->numa_node;
488         struct rte_mbuf *fake_mbuf = NULL;
489
490         PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
491                         queue_idx, numa_node);
492
493         /*
494          * Read the virtqueue size from the Queue Size field
495          * Always power of 2 and if 0 virtqueue does not exist
496          */
497         vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
498         PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
499         if (vq_size == 0) {
500                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
501                 return -EINVAL;
502         }
503
504         if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
505                 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
506                 return -EINVAL;
507         }
508
509         snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
510                  dev->data->port_id, queue_idx);
511
512         size = RTE_ALIGN_CEIL(sizeof(*vq) +
513                                 vq_size * sizeof(struct vq_desc_extra),
514                                 RTE_CACHE_LINE_SIZE);
515         if (queue_type == VTNET_TQ) {
516                 /*
517                  * For each xmit packet, allocate a virtio_net_hdr
518                  * and indirect ring elements
519                  */
520                 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
521         } else if (queue_type == VTNET_CQ) {
522                 /* Allocate a page for control vq command, data and status */
523                 sz_hdr_mz = rte_mem_page_size();
524         }
525
526         vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
527                                 numa_node);
528         if (vq == NULL) {
529                 PMD_INIT_LOG(ERR, "can not allocate vq");
530                 return -ENOMEM;
531         }
532         hw->vqs[queue_idx] = vq;
533
534         vq->hw = hw;
535         vq->vq_queue_index = queue_idx;
536         vq->vq_nentries = vq_size;
537         if (virtio_with_packed_queue(hw)) {
538                 vq->vq_packed.used_wrap_counter = 1;
539                 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
540                 vq->vq_packed.event_flags_shadow = 0;
541                 if (queue_type == VTNET_RQ)
542                         vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
543         }
544
545         /*
546          * Reserve a memzone for vring elements
547          */
548         size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
549         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
550         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
551                      size, vq->vq_ring_size);
552
553         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
554                         numa_node, RTE_MEMZONE_IOVA_CONTIG,
555                         VIRTIO_VRING_ALIGN);
556         if (mz == NULL) {
557                 if (rte_errno == EEXIST)
558                         mz = rte_memzone_lookup(vq_name);
559                 if (mz == NULL) {
560                         ret = -ENOMEM;
561                         goto free_vq;
562                 }
563         }
564
565         memset(mz->addr, 0, mz->len);
566
567         if (hw->use_va)
568                 vq->vq_ring_mem = (uintptr_t)mz->addr;
569         else
570                 vq->vq_ring_mem = mz->iova;
571
572         vq->vq_ring_virt_mem = mz->addr;
573         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
574         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
575
576         virtio_init_vring(vq);
577
578         if (sz_hdr_mz) {
579                 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
580                          dev->data->port_id, queue_idx);
581                 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
582                                 numa_node, RTE_MEMZONE_IOVA_CONTIG,
583                                 RTE_CACHE_LINE_SIZE);
584                 if (hdr_mz == NULL) {
585                         if (rte_errno == EEXIST)
586                                 hdr_mz = rte_memzone_lookup(vq_hdr_name);
587                         if (hdr_mz == NULL) {
588                                 ret = -ENOMEM;
589                                 goto free_mz;
590                         }
591                 }
592         }
593
594         if (queue_type == VTNET_RQ) {
595                 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
596                                sizeof(vq->sw_ring[0]);
597
598                 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
599                                 RTE_CACHE_LINE_SIZE, numa_node);
600                 if (!sw_ring) {
601                         PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
602                         ret = -ENOMEM;
603                         goto free_hdr_mz;
604                 }
605
606                 fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
607                                 RTE_CACHE_LINE_SIZE, numa_node);
608                 if (!fake_mbuf) {
609                         PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
610                         ret = -ENOMEM;
611                         goto free_sw_ring;
612                 }
613
614                 vq->sw_ring = sw_ring;
615                 rxvq = &vq->rxq;
616                 rxvq->port_id = dev->data->port_id;
617                 rxvq->mz = mz;
618                 rxvq->fake_mbuf = fake_mbuf;
619         } else if (queue_type == VTNET_TQ) {
620                 txvq = &vq->txq;
621                 txvq->port_id = dev->data->port_id;
622                 txvq->mz = mz;
623                 txvq->virtio_net_hdr_mz = hdr_mz;
624                 if (hw->use_va)
625                         txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
626                 else
627                         txvq->virtio_net_hdr_mem = hdr_mz->iova;
628         } else if (queue_type == VTNET_CQ) {
629                 cvq = &vq->cq;
630                 cvq->mz = mz;
631                 cvq->virtio_net_hdr_mz = hdr_mz;
632                 if (hw->use_va)
633                         cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
634                 else
635                         cvq->virtio_net_hdr_mem = hdr_mz->iova;
636                 memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
637
638                 hw->cvq = cvq;
639         }
640
641         if (hw->use_va)
642                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
643         else
644                 vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
645
646         if (queue_type == VTNET_TQ) {
647                 struct virtio_tx_region *txr;
648                 unsigned int i;
649
650                 txr = hdr_mz->addr;
651                 memset(txr, 0, vq_size * sizeof(*txr));
652                 for (i = 0; i < vq_size; i++) {
653                         /* first indirect descriptor is always the tx header */
654                         if (!virtio_with_packed_queue(hw)) {
655                                 struct vring_desc *start_dp = txr[i].tx_indir;
656                                 vring_desc_init_split(start_dp,
657                                                       RTE_DIM(txr[i].tx_indir));
658                                 start_dp->addr = txvq->virtio_net_hdr_mem
659                                         + i * sizeof(*txr)
660                                         + offsetof(struct virtio_tx_region,
661                                                    tx_hdr);
662                                 start_dp->len = hw->vtnet_hdr_size;
663                                 start_dp->flags = VRING_DESC_F_NEXT;
664                         } else {
665                                 struct vring_packed_desc *start_dp =
666                                         txr[i].tx_packed_indir;
667                                 vring_desc_init_indirect_packed(start_dp,
668                                       RTE_DIM(txr[i].tx_packed_indir));
669                                 start_dp->addr = txvq->virtio_net_hdr_mem
670                                         + i * sizeof(*txr)
671                                         + offsetof(struct virtio_tx_region,
672                                                    tx_hdr);
673                                 start_dp->len = hw->vtnet_hdr_size;
674                         }
675                 }
676         }
677
678         if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
679                 PMD_INIT_LOG(ERR, "setup_queue failed");
680                 ret = -EINVAL;
681                 goto clean_vq;
682         }
683
684         return 0;
685
686 clean_vq:
687         hw->cvq = NULL;
688         rte_free(fake_mbuf);
689 free_sw_ring:
690         rte_free(sw_ring);
691 free_hdr_mz:
692         rte_memzone_free(hdr_mz);
693 free_mz:
694         rte_memzone_free(mz);
695 free_vq:
696         rte_free(vq);
697         hw->vqs[queue_idx] = NULL;
698
699         return ret;
700 }
701
702 static void
703 virtio_free_queues(struct virtio_hw *hw)
704 {
705         uint16_t nr_vq = virtio_get_nr_vq(hw);
706         struct virtqueue *vq;
707         int queue_type;
708         uint16_t i;
709
710         if (hw->vqs == NULL)
711                 return;
712
713         for (i = 0; i < nr_vq; i++) {
714                 vq = hw->vqs[i];
715                 if (!vq)
716                         continue;
717
718                 queue_type = virtio_get_queue_type(hw, i);
719                 if (queue_type == VTNET_RQ) {
720                         rte_free(vq->rxq.fake_mbuf);
721                         rte_free(vq->sw_ring);
722                         rte_memzone_free(vq->rxq.mz);
723                 } else if (queue_type == VTNET_TQ) {
724                         rte_memzone_free(vq->txq.mz);
725                         rte_memzone_free(vq->txq.virtio_net_hdr_mz);
726                 } else {
727                         rte_memzone_free(vq->cq.mz);
728                         rte_memzone_free(vq->cq.virtio_net_hdr_mz);
729                 }
730
731                 rte_free(vq);
732                 hw->vqs[i] = NULL;
733         }
734
735         rte_free(hw->vqs);
736         hw->vqs = NULL;
737 }
738
739 static int
740 virtio_alloc_queues(struct rte_eth_dev *dev)
741 {
742         struct virtio_hw *hw = dev->data->dev_private;
743         uint16_t nr_vq = virtio_get_nr_vq(hw);
744         uint16_t i;
745         int ret;
746
747         hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
748         if (!hw->vqs) {
749                 PMD_INIT_LOG(ERR, "failed to allocate vqs");
750                 return -ENOMEM;
751         }
752
753         for (i = 0; i < nr_vq; i++) {
754                 ret = virtio_init_queue(dev, i);
755                 if (ret < 0) {
756                         virtio_free_queues(hw);
757                         return ret;
758                 }
759         }
760
761         return 0;
762 }
763
764 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
765
766 static void
767 virtio_free_rss(struct virtio_hw *hw)
768 {
769         rte_free(hw->rss_key);
770         hw->rss_key = NULL;
771
772         rte_free(hw->rss_reta);
773         hw->rss_reta = NULL;
774 }
775
776 int
777 virtio_dev_close(struct rte_eth_dev *dev)
778 {
779         struct virtio_hw *hw = dev->data->dev_private;
780         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
781
782         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
783         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
784                 return 0;
785
786         if (!hw->opened)
787                 return 0;
788         hw->opened = 0;
789
790         /* reset the NIC */
791         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
792                 VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
793         if (intr_conf->rxq)
794                 virtio_queues_unbind_intr(dev);
795
796         if (intr_conf->lsc || intr_conf->rxq) {
797                 virtio_intr_disable(dev);
798                 rte_intr_efd_disable(dev->intr_handle);
799                 rte_intr_vec_list_free(dev->intr_handle);
800         }
801
802         virtio_reset(hw);
803         virtio_dev_free_mbufs(dev);
804         virtio_free_queues(hw);
805         virtio_free_rss(hw);
806
807         return VIRTIO_OPS(hw)->dev_close(hw);
808 }
809
810 static int
811 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
812 {
813         struct virtio_hw *hw = dev->data->dev_private;
814         struct virtio_pmd_ctrl ctrl;
815         int dlen[1];
816         int ret;
817
818         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
819                 PMD_INIT_LOG(INFO, "host does not support rx control");
820                 return -ENOTSUP;
821         }
822
823         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
824         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
825         ctrl.data[0] = 1;
826         dlen[0] = 1;
827
828         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
829         if (ret) {
830                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
831                 return -EAGAIN;
832         }
833
834         return 0;
835 }
836
837 static int
838 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
839 {
840         struct virtio_hw *hw = dev->data->dev_private;
841         struct virtio_pmd_ctrl ctrl;
842         int dlen[1];
843         int ret;
844
845         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
846                 PMD_INIT_LOG(INFO, "host does not support rx control");
847                 return -ENOTSUP;
848         }
849
850         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
851         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
852         ctrl.data[0] = 0;
853         dlen[0] = 1;
854
855         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
856         if (ret) {
857                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
858                 return -EAGAIN;
859         }
860
861         return 0;
862 }
863
864 static int
865 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
866 {
867         struct virtio_hw *hw = dev->data->dev_private;
868         struct virtio_pmd_ctrl ctrl;
869         int dlen[1];
870         int ret;
871
872         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
873                 PMD_INIT_LOG(INFO, "host does not support rx control");
874                 return -ENOTSUP;
875         }
876
877         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
878         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
879         ctrl.data[0] = 1;
880         dlen[0] = 1;
881
882         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
883         if (ret) {
884                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
885                 return -EAGAIN;
886         }
887
888         return 0;
889 }
890
891 static int
892 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
893 {
894         struct virtio_hw *hw = dev->data->dev_private;
895         struct virtio_pmd_ctrl ctrl;
896         int dlen[1];
897         int ret;
898
899         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
900                 PMD_INIT_LOG(INFO, "host does not support rx control");
901                 return -ENOTSUP;
902         }
903
904         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
905         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
906         ctrl.data[0] = 0;
907         dlen[0] = 1;
908
909         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
910         if (ret) {
911                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
912                 return -EAGAIN;
913         }
914
915         return 0;
916 }
917
918 uint16_t
919 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
920 {
921         return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
922 }
923
924 bool
925 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
926                         bool rx_scatter_enabled, const char **error)
927 {
928         if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
929                 *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
930                 return false;
931         }
932
933         return true;
934 }
935
936 static bool
937 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
938                                       uint16_t frame_size)
939 {
940         struct virtio_hw *hw = dev->data->dev_private;
941         struct virtnet_rx *rxvq;
942         struct virtqueue *vq;
943         unsigned int qidx;
944         uint16_t buf_size;
945         const char *error;
946
947         if (hw->vqs == NULL)
948                 return true;
949
950         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
951                 vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX];
952                 if (vq == NULL)
953                         continue;
954
955                 rxvq = &vq->rxq;
956                 if (rxvq->mpool == NULL)
957                         continue;
958                 buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
959
960                 if (!virtio_rx_check_scatter(frame_size, buf_size,
961                                              hw->rx_ol_scatter, &error)) {
962                         PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
963                                      qidx, error);
964                         return false;
965                 }
966         }
967
968         return true;
969 }
970
971 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
972 static int
973 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
974 {
975         struct virtio_hw *hw = dev->data->dev_private;
976         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
977                                  hw->vtnet_hdr_size;
978         uint32_t frame_size = mtu + ether_hdr_len;
979         uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
980
981         max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
982
983         if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
984                 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
985                         RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
986                 return -EINVAL;
987         }
988
989         if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
990                 PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
991                 return -EINVAL;
992         }
993
994         hw->max_rx_pkt_len = frame_size;
995
996         return 0;
997 }
998
999 static int
1000 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1001 {
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1004         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1005
1006         virtqueue_enable_intr(vq);
1007         virtio_mb(hw->weak_barriers);
1008         return 0;
1009 }
1010
1011 static int
1012 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1013 {
1014         struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
1015         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1016
1017         virtqueue_disable_intr(vq);
1018         return 0;
1019 }
1020
1021 /*
1022  * dev_ops for virtio, bare necessities for basic operation
1023  */
1024 static const struct eth_dev_ops virtio_eth_dev_ops = {
1025         .dev_configure           = virtio_dev_configure,
1026         .dev_start               = virtio_dev_start,
1027         .dev_stop                = virtio_dev_stop,
1028         .dev_close               = virtio_dev_close,
1029         .promiscuous_enable      = virtio_dev_promiscuous_enable,
1030         .promiscuous_disable     = virtio_dev_promiscuous_disable,
1031         .allmulticast_enable     = virtio_dev_allmulticast_enable,
1032         .allmulticast_disable    = virtio_dev_allmulticast_disable,
1033         .mtu_set                 = virtio_mtu_set,
1034         .dev_infos_get           = virtio_dev_info_get,
1035         .stats_get               = virtio_dev_stats_get,
1036         .xstats_get              = virtio_dev_xstats_get,
1037         .xstats_get_names        = virtio_dev_xstats_get_names,
1038         .stats_reset             = virtio_dev_stats_reset,
1039         .xstats_reset            = virtio_dev_stats_reset,
1040         .link_update             = virtio_dev_link_update,
1041         .vlan_offload_set        = virtio_dev_vlan_offload_set,
1042         .rx_queue_setup          = virtio_dev_rx_queue_setup,
1043         .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
1044         .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
1045         .tx_queue_setup          = virtio_dev_tx_queue_setup,
1046         .rss_hash_update         = virtio_dev_rss_hash_update,
1047         .rss_hash_conf_get       = virtio_dev_rss_hash_conf_get,
1048         .reta_update             = virtio_dev_rss_reta_update,
1049         .reta_query              = virtio_dev_rss_reta_query,
1050         /* collect stats per queue */
1051         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1052         .vlan_filter_set         = virtio_vlan_filter_set,
1053         .mac_addr_add            = virtio_mac_addr_add,
1054         .mac_addr_remove         = virtio_mac_addr_remove,
1055         .mac_addr_set            = virtio_mac_addr_set,
1056         .get_monitor_addr        = virtio_get_monitor_addr,
1057 };
1058
1059 /*
1060  * dev_ops for virtio-user in secondary processes, as we just have
1061  * some limited supports currently.
1062  */
1063 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1064         .dev_infos_get           = virtio_dev_info_get,
1065         .stats_get               = virtio_dev_stats_get,
1066         .xstats_get              = virtio_dev_xstats_get,
1067         .xstats_get_names        = virtio_dev_xstats_get_names,
1068         .stats_reset             = virtio_dev_stats_reset,
1069         .xstats_reset            = virtio_dev_stats_reset,
1070         /* collect stats per queue */
1071         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1072 };
1073
1074 static void
1075 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1076 {
1077         unsigned i;
1078
1079         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1080                 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1081                 if (txvq == NULL)
1082                         continue;
1083
1084                 stats->opackets += txvq->stats.packets;
1085                 stats->obytes += txvq->stats.bytes;
1086
1087                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1088                         stats->q_opackets[i] = txvq->stats.packets;
1089                         stats->q_obytes[i] = txvq->stats.bytes;
1090                 }
1091         }
1092
1093         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1094                 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1095                 if (rxvq == NULL)
1096                         continue;
1097
1098                 stats->ipackets += rxvq->stats.packets;
1099                 stats->ibytes += rxvq->stats.bytes;
1100                 stats->ierrors += rxvq->stats.errors;
1101
1102                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1103                         stats->q_ipackets[i] = rxvq->stats.packets;
1104                         stats->q_ibytes[i] = rxvq->stats.bytes;
1105                 }
1106         }
1107
1108         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1109 }
1110
1111 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1112                                        struct rte_eth_xstat_name *xstats_names,
1113                                        __rte_unused unsigned limit)
1114 {
1115         unsigned i;
1116         unsigned count = 0;
1117         unsigned t;
1118
1119         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1120                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1121
1122         if (xstats_names != NULL) {
1123                 /* Note: limit checked in rte_eth_xstats_names() */
1124
1125                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126                         struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1127                         if (rxvq == NULL)
1128                                 continue;
1129                         for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1130                                 snprintf(xstats_names[count].name,
1131                                         sizeof(xstats_names[count].name),
1132                                         "rx_q%u_%s", i,
1133                                         rte_virtio_rxq_stat_strings[t].name);
1134                                 count++;
1135                         }
1136                 }
1137
1138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1139                         struct virtnet_tx *txvq = dev->data->tx_queues[i];
1140                         if (txvq == NULL)
1141                                 continue;
1142                         for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1143                                 snprintf(xstats_names[count].name,
1144                                         sizeof(xstats_names[count].name),
1145                                         "tx_q%u_%s", i,
1146                                         rte_virtio_txq_stat_strings[t].name);
1147                                 count++;
1148                         }
1149                 }
1150                 return count;
1151         }
1152         return nstats;
1153 }
1154
1155 static int
1156 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1157                       unsigned n)
1158 {
1159         unsigned i;
1160         unsigned count = 0;
1161
1162         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1163                 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1164
1165         if (n < nstats)
1166                 return nstats;
1167
1168         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1169                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1170
1171                 if (rxvq == NULL)
1172                         continue;
1173
1174                 unsigned t;
1175
1176                 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1177                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1178                                 rte_virtio_rxq_stat_strings[t].offset);
1179                         xstats[count].id = count;
1180                         count++;
1181                 }
1182         }
1183
1184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1186
1187                 if (txvq == NULL)
1188                         continue;
1189
1190                 unsigned t;
1191
1192                 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1193                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
1194                                 rte_virtio_txq_stat_strings[t].offset);
1195                         xstats[count].id = count;
1196                         count++;
1197                 }
1198         }
1199
1200         return count;
1201 }
1202
1203 static int
1204 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1205 {
1206         virtio_update_stats(dev, stats);
1207
1208         return 0;
1209 }
1210
1211 static int
1212 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1213 {
1214         unsigned int i;
1215
1216         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1217                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1218                 if (txvq == NULL)
1219                         continue;
1220
1221                 txvq->stats.packets = 0;
1222                 txvq->stats.bytes = 0;
1223                 txvq->stats.multicast = 0;
1224                 txvq->stats.broadcast = 0;
1225                 memset(txvq->stats.size_bins, 0,
1226                        sizeof(txvq->stats.size_bins[0]) * 8);
1227         }
1228
1229         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1230                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1231                 if (rxvq == NULL)
1232                         continue;
1233
1234                 rxvq->stats.packets = 0;
1235                 rxvq->stats.bytes = 0;
1236                 rxvq->stats.errors = 0;
1237                 rxvq->stats.multicast = 0;
1238                 rxvq->stats.broadcast = 0;
1239                 memset(rxvq->stats.size_bins, 0,
1240                        sizeof(rxvq->stats.size_bins[0]) * 8);
1241         }
1242
1243         return 0;
1244 }
1245
1246 static void
1247 virtio_set_hwaddr(struct virtio_hw *hw)
1248 {
1249         virtio_write_dev_config(hw,
1250                         offsetof(struct virtio_net_config, mac),
1251                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1252 }
1253
1254 static void
1255 virtio_get_hwaddr(struct virtio_hw *hw)
1256 {
1257         if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1258                 virtio_read_dev_config(hw,
1259                         offsetof(struct virtio_net_config, mac),
1260                         &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1261         } else {
1262                 rte_eth_random_addr(&hw->mac_addr[0]);
1263                 virtio_set_hwaddr(hw);
1264         }
1265 }
1266
1267 static int
1268 virtio_mac_table_set(struct virtio_hw *hw,
1269                      const struct virtio_net_ctrl_mac *uc,
1270                      const struct virtio_net_ctrl_mac *mc)
1271 {
1272         struct virtio_pmd_ctrl ctrl;
1273         int err, len[2];
1274
1275         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1276                 PMD_DRV_LOG(INFO, "host does not support mac table");
1277                 return -1;
1278         }
1279
1280         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1281         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1282
1283         len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1284         memcpy(ctrl.data, uc, len[0]);
1285
1286         len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1287         memcpy(ctrl.data + len[0], mc, len[1]);
1288
1289         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1290         if (err != 0)
1291                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1292         return err;
1293 }
1294
1295 static int
1296 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1297                     uint32_t index, uint32_t vmdq __rte_unused)
1298 {
1299         struct virtio_hw *hw = dev->data->dev_private;
1300         const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1301         unsigned int i;
1302         struct virtio_net_ctrl_mac *uc, *mc;
1303
1304         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1305                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1306                 return -EINVAL;
1307         }
1308
1309         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1310                 sizeof(uc->entries));
1311         uc->entries = 0;
1312         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1313                 sizeof(mc->entries));
1314         mc->entries = 0;
1315
1316         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1317                 const struct rte_ether_addr *addr
1318                         = (i == index) ? mac_addr : addrs + i;
1319                 struct virtio_net_ctrl_mac *tbl
1320                         = rte_is_multicast_ether_addr(addr) ? mc : uc;
1321
1322                 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1323         }
1324
1325         return virtio_mac_table_set(hw, uc, mc);
1326 }
1327
1328 static void
1329 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1330 {
1331         struct virtio_hw *hw = dev->data->dev_private;
1332         struct rte_ether_addr *addrs = dev->data->mac_addrs;
1333         struct virtio_net_ctrl_mac *uc, *mc;
1334         unsigned int i;
1335
1336         if (index >= VIRTIO_MAX_MAC_ADDRS) {
1337                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1338                 return;
1339         }
1340
1341         uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1342                 sizeof(uc->entries));
1343         uc->entries = 0;
1344         mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1345                 sizeof(mc->entries));
1346         mc->entries = 0;
1347
1348         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1349                 struct virtio_net_ctrl_mac *tbl;
1350
1351                 if (i == index || rte_is_zero_ether_addr(addrs + i))
1352                         continue;
1353
1354                 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1355                 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1356                         RTE_ETHER_ADDR_LEN);
1357         }
1358
1359         virtio_mac_table_set(hw, uc, mc);
1360 }
1361
1362 static int
1363 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1364 {
1365         struct virtio_hw *hw = dev->data->dev_private;
1366
1367         memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1368
1369         /* Use atomic update if available */
1370         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1371                 struct virtio_pmd_ctrl ctrl;
1372                 int len = RTE_ETHER_ADDR_LEN;
1373
1374                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1375                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1376
1377                 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1378                 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1379         }
1380
1381         if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1382                 return -ENOTSUP;
1383
1384         virtio_set_hwaddr(hw);
1385         return 0;
1386 }
1387
1388 #define CLB_VAL_IDX 0
1389 #define CLB_MSK_IDX 1
1390 #define CLB_MATCH_IDX 2
1391 static int
1392 virtio_monitor_callback(const uint64_t value,
1393                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1394 {
1395         const uint64_t m = opaque[CLB_MSK_IDX];
1396         const uint64_t v = opaque[CLB_VAL_IDX];
1397         const uint64_t c = opaque[CLB_MATCH_IDX];
1398
1399         if (c)
1400                 return (value & m) == v ? -1 : 0;
1401         else
1402                 return (value & m) == v ? 0 : -1;
1403 }
1404
1405 static int
1406 virtio_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1407 {
1408         struct virtnet_rx *rxvq = rx_queue;
1409         struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1410         struct virtio_hw *hw;
1411
1412         if (vq == NULL)
1413                 return -EINVAL;
1414
1415         hw = vq->hw;
1416         if (virtio_with_packed_queue(hw)) {
1417                 struct vring_packed_desc *desc;
1418                 desc = vq->vq_packed.ring.desc;
1419                 pmc->addr = &desc[vq->vq_used_cons_idx].flags;
1420                 if (vq->vq_packed.used_wrap_counter)
1421                         pmc->opaque[CLB_VAL_IDX] =
1422                                                 VRING_PACKED_DESC_F_AVAIL_USED;
1423                 else
1424                         pmc->opaque[CLB_VAL_IDX] = 0;
1425                 pmc->opaque[CLB_MSK_IDX] = VRING_PACKED_DESC_F_AVAIL_USED;
1426                 pmc->opaque[CLB_MATCH_IDX] = 1;
1427                 pmc->size = sizeof(desc[vq->vq_used_cons_idx].flags);
1428         } else {
1429                 pmc->addr = &vq->vq_split.ring.used->idx;
1430                 pmc->opaque[CLB_VAL_IDX] = vq->vq_used_cons_idx
1431                                         & (vq->vq_nentries - 1);
1432                 pmc->opaque[CLB_MSK_IDX] = vq->vq_nentries - 1;
1433                 pmc->opaque[CLB_MATCH_IDX] = 0;
1434                 pmc->size = sizeof(vq->vq_split.ring.used->idx);
1435         }
1436         pmc->fn = virtio_monitor_callback;
1437
1438         return 0;
1439 }
1440
1441 static int
1442 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1443 {
1444         struct virtio_hw *hw = dev->data->dev_private;
1445         struct virtio_pmd_ctrl ctrl;
1446         int len;
1447
1448         if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1449                 return -ENOTSUP;
1450
1451         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1452         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1453         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1454         len = sizeof(vlan_id);
1455
1456         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1457 }
1458
1459 static int
1460 virtio_intr_unmask(struct rte_eth_dev *dev)
1461 {
1462         struct virtio_hw *hw = dev->data->dev_private;
1463
1464         if (rte_intr_ack(dev->intr_handle) < 0)
1465                 return -1;
1466
1467         if (VIRTIO_OPS(hw)->intr_detect)
1468                 VIRTIO_OPS(hw)->intr_detect(hw);
1469
1470         return 0;
1471 }
1472
1473 static int
1474 virtio_intr_enable(struct rte_eth_dev *dev)
1475 {
1476         struct virtio_hw *hw = dev->data->dev_private;
1477
1478         if (rte_intr_enable(dev->intr_handle) < 0)
1479                 return -1;
1480
1481         if (VIRTIO_OPS(hw)->intr_detect)
1482                 VIRTIO_OPS(hw)->intr_detect(hw);
1483
1484         return 0;
1485 }
1486
1487 static int
1488 virtio_intr_disable(struct rte_eth_dev *dev)
1489 {
1490         struct virtio_hw *hw = dev->data->dev_private;
1491
1492         if (rte_intr_disable(dev->intr_handle) < 0)
1493                 return -1;
1494
1495         if (VIRTIO_OPS(hw)->intr_detect)
1496                 VIRTIO_OPS(hw)->intr_detect(hw);
1497
1498         return 0;
1499 }
1500
1501 static int
1502 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1503 {
1504         uint64_t host_features;
1505
1506         /* Prepare guest_features: feature that driver wants to support */
1507         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1508                 req_features);
1509
1510         /* Read device(host) feature bits */
1511         host_features = VIRTIO_OPS(hw)->get_features(hw);
1512         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1513                 host_features);
1514
1515         /* If supported, ensure MTU value is valid before acknowledging it. */
1516         if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1517                 struct virtio_net_config config;
1518
1519                 virtio_read_dev_config(hw,
1520                         offsetof(struct virtio_net_config, mtu),
1521                         &config.mtu, sizeof(config.mtu));
1522
1523                 if (config.mtu < RTE_ETHER_MIN_MTU)
1524                         req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1525         }
1526
1527         /*
1528          * Negotiate features: Subset of device feature bits are written back
1529          * guest feature bits.
1530          */
1531         hw->guest_features = req_features;
1532         hw->guest_features = virtio_negotiate_features(hw, host_features);
1533         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1534                 hw->guest_features);
1535
1536         if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1537                 return -1;
1538
1539         if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1540                 virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1541
1542                 if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1543                         PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1544                         return -1;
1545                 }
1546         }
1547
1548         hw->req_guest_features = req_features;
1549
1550         return 0;
1551 }
1552
1553 int
1554 virtio_dev_pause(struct rte_eth_dev *dev)
1555 {
1556         struct virtio_hw *hw = dev->data->dev_private;
1557
1558         rte_spinlock_lock(&hw->state_lock);
1559
1560         if (hw->started == 0) {
1561                 /* Device is just stopped. */
1562                 rte_spinlock_unlock(&hw->state_lock);
1563                 return -1;
1564         }
1565         hw->started = 0;
1566         /*
1567          * Prevent the worker threads from touching queues to avoid contention,
1568          * 1 ms should be enough for the ongoing Tx function to finish.
1569          */
1570         rte_delay_ms(1);
1571         return 0;
1572 }
1573
1574 /*
1575  * Recover hw state to let the worker threads continue.
1576  */
1577 void
1578 virtio_dev_resume(struct rte_eth_dev *dev)
1579 {
1580         struct virtio_hw *hw = dev->data->dev_private;
1581
1582         hw->started = 1;
1583         rte_spinlock_unlock(&hw->state_lock);
1584 }
1585
1586 /*
1587  * Should be called only after device is paused.
1588  */
1589 int
1590 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1591                 int nb_pkts)
1592 {
1593         struct virtio_hw *hw = dev->data->dev_private;
1594         struct virtnet_tx *txvq = dev->data->tx_queues[0];
1595         int ret;
1596
1597         hw->inject_pkts = tx_pkts;
1598         ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1599         hw->inject_pkts = NULL;
1600
1601         return ret;
1602 }
1603
1604 static void
1605 virtio_notify_peers(struct rte_eth_dev *dev)
1606 {
1607         struct virtio_hw *hw = dev->data->dev_private;
1608         struct virtnet_rx *rxvq;
1609         struct rte_mbuf *rarp_mbuf;
1610
1611         if (!dev->data->rx_queues)
1612                 return;
1613
1614         rxvq = dev->data->rx_queues[0];
1615         if (!rxvq)
1616                 return;
1617
1618         rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1619                         (struct rte_ether_addr *)hw->mac_addr);
1620         if (rarp_mbuf == NULL) {
1621                 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1622                 return;
1623         }
1624
1625         /* If virtio port just stopped, no need to send RARP */
1626         if (virtio_dev_pause(dev) < 0) {
1627                 rte_pktmbuf_free(rarp_mbuf);
1628                 return;
1629         }
1630
1631         virtio_inject_pkts(dev, &rarp_mbuf, 1);
1632         virtio_dev_resume(dev);
1633 }
1634
1635 static void
1636 virtio_ack_link_announce(struct rte_eth_dev *dev)
1637 {
1638         struct virtio_hw *hw = dev->data->dev_private;
1639         struct virtio_pmd_ctrl ctrl;
1640
1641         ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1642         ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1643
1644         virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1645 }
1646
1647 /*
1648  * Process virtio config changed interrupt. Call the callback
1649  * if link state changed, generate gratuitous RARP packet if
1650  * the status indicates an ANNOUNCE.
1651  */
1652 void
1653 virtio_interrupt_handler(void *param)
1654 {
1655         struct rte_eth_dev *dev = param;
1656         struct virtio_hw *hw = dev->data->dev_private;
1657         uint8_t isr;
1658         uint16_t status;
1659
1660         /* Read interrupt status which clears interrupt */
1661         isr = virtio_get_isr(hw);
1662         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1663
1664         if (virtio_intr_unmask(dev) < 0)
1665                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1666
1667         if (isr & VIRTIO_ISR_CONFIG) {
1668                 if (virtio_dev_link_update(dev, 0) == 0)
1669                         rte_eth_dev_callback_process(dev,
1670                                                      RTE_ETH_EVENT_INTR_LSC,
1671                                                      NULL);
1672
1673                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1674                         virtio_read_dev_config(hw,
1675                                 offsetof(struct virtio_net_config, status),
1676                                 &status, sizeof(status));
1677                         if (status & VIRTIO_NET_S_ANNOUNCE) {
1678                                 virtio_notify_peers(dev);
1679                                 if (hw->cvq)
1680                                         virtio_ack_link_announce(dev);
1681                         }
1682                 }
1683         }
1684 }
1685
1686 /* set rx and tx handlers according to what is supported */
1687 static void
1688 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1689 {
1690         struct virtio_hw *hw = eth_dev->data->dev_private;
1691
1692         eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1693         if (virtio_with_packed_queue(hw)) {
1694                 PMD_INIT_LOG(INFO,
1695                         "virtio: using packed ring %s Tx path on port %u",
1696                         hw->use_vec_tx ? "vectorized" : "standard",
1697                         eth_dev->data->port_id);
1698                 if (hw->use_vec_tx)
1699                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1700                 else
1701                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1702         } else {
1703                 if (hw->use_inorder_tx) {
1704                         PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1705                                 eth_dev->data->port_id);
1706                         eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1707                 } else {
1708                         PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1709                                 eth_dev->data->port_id);
1710                         eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1711                 }
1712         }
1713
1714         if (virtio_with_packed_queue(hw)) {
1715                 if (hw->use_vec_rx) {
1716                         PMD_INIT_LOG(INFO,
1717                                 "virtio: using packed ring vectorized Rx path on port %u",
1718                                 eth_dev->data->port_id);
1719                         eth_dev->rx_pkt_burst =
1720                                 &virtio_recv_pkts_packed_vec;
1721                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1722                         PMD_INIT_LOG(INFO,
1723                                 "virtio: using packed ring mergeable buffer Rx path on port %u",
1724                                 eth_dev->data->port_id);
1725                         eth_dev->rx_pkt_burst =
1726                                 &virtio_recv_mergeable_pkts_packed;
1727                 } else {
1728                         PMD_INIT_LOG(INFO,
1729                                 "virtio: using packed ring standard Rx path on port %u",
1730                                 eth_dev->data->port_id);
1731                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1732                 }
1733         } else {
1734                 if (hw->use_vec_rx) {
1735                         PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1736                                 eth_dev->data->port_id);
1737                         eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1738                 } else if (hw->use_inorder_rx) {
1739                         PMD_INIT_LOG(INFO,
1740                                 "virtio: using inorder Rx path on port %u",
1741                                 eth_dev->data->port_id);
1742                         eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1743                 } else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1744                         PMD_INIT_LOG(INFO,
1745                                 "virtio: using mergeable buffer Rx path on port %u",
1746                                 eth_dev->data->port_id);
1747                         eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1748                 } else {
1749                         PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1750                                 eth_dev->data->port_id);
1751                         eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1752                 }
1753         }
1754
1755 }
1756
1757 /* Only support 1:1 queue/interrupt mapping so far.
1758  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1759  * interrupt vectors (<N+1).
1760  */
1761 static int
1762 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1763 {
1764         uint32_t i;
1765         struct virtio_hw *hw = dev->data->dev_private;
1766
1767         PMD_INIT_LOG(INFO, "queue/interrupt binding");
1768         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1769                 if (rte_intr_vec_list_index_set(dev->intr_handle, i,
1770                                                        i + 1))
1771                         return -rte_errno;
1772                 if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1773                                                  VIRTIO_MSI_NO_VECTOR) {
1774                         PMD_DRV_LOG(ERR, "failed to set queue vector");
1775                         return -EBUSY;
1776                 }
1777         }
1778
1779         return 0;
1780 }
1781
1782 static void
1783 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1784 {
1785         uint32_t i;
1786         struct virtio_hw *hw = dev->data->dev_private;
1787
1788         PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1789         for (i = 0; i < dev->data->nb_rx_queues; ++i)
1790                 VIRTIO_OPS(hw)->set_queue_irq(hw,
1791                                              hw->vqs[i * VTNET_CQ],
1792                                              VIRTIO_MSI_NO_VECTOR);
1793 }
1794
1795 static int
1796 virtio_configure_intr(struct rte_eth_dev *dev)
1797 {
1798         struct virtio_hw *hw = dev->data->dev_private;
1799
1800         if (!rte_intr_cap_multiple(dev->intr_handle)) {
1801                 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1802                 return -ENOTSUP;
1803         }
1804
1805         if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1806                 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1807                 return -1;
1808         }
1809
1810         if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec",
1811                                     hw->max_queue_pairs)) {
1812                 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1813                              hw->max_queue_pairs);
1814                 return -ENOMEM;
1815         }
1816
1817         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1818                 /* Re-register callback to update max_intr */
1819                 rte_intr_callback_unregister(dev->intr_handle,
1820                                              virtio_interrupt_handler,
1821                                              dev);
1822                 rte_intr_callback_register(dev->intr_handle,
1823                                            virtio_interrupt_handler,
1824                                            dev);
1825         }
1826
1827         /* DO NOT try to remove this! This function will enable msix, or QEMU
1828          * will encounter SIGSEGV when DRIVER_OK is sent.
1829          * And for legacy devices, this should be done before queue/vec binding
1830          * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1831          * (22) will be ignored.
1832          */
1833         if (virtio_intr_enable(dev) < 0) {
1834                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1835                 return -1;
1836         }
1837
1838         if (virtio_queues_bind_intr(dev) < 0) {
1839                 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1840                 return -1;
1841         }
1842
1843         return 0;
1844 }
1845
1846 static void
1847 virtio_get_speed_duplex(struct rte_eth_dev *eth_dev,
1848                         struct rte_eth_link *link)
1849 {
1850         struct virtio_hw *hw = eth_dev->data->dev_private;
1851         struct virtio_net_config *config;
1852         struct virtio_net_config local_config;
1853
1854         config = &local_config;
1855         virtio_read_dev_config(hw,
1856                 offsetof(struct virtio_net_config, speed),
1857                 &config->speed, sizeof(config->speed));
1858         virtio_read_dev_config(hw,
1859                 offsetof(struct virtio_net_config, duplex),
1860                 &config->duplex, sizeof(config->duplex));
1861         hw->speed = config->speed;
1862         hw->duplex = config->duplex;
1863         if (link != NULL) {
1864                 link->link_duplex = hw->duplex;
1865                 link->link_speed  = hw->speed;
1866         }
1867         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1868                      hw->speed, hw->duplex);
1869 }
1870
1871 static uint64_t
1872 ethdev_to_virtio_rss_offloads(uint64_t ethdev_hash_types)
1873 {
1874         uint64_t virtio_hash_types = 0;
1875
1876         if (ethdev_hash_types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1877                                 RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1878                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV4;
1879
1880         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1881                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV4;
1882
1883         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1884                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV4;
1885
1886         if (ethdev_hash_types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1887                                 RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1888                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IPV6;
1889
1890         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1891                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCPV6;
1892
1893         if (ethdev_hash_types & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1894                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDPV6;
1895
1896         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_EX)
1897                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_IP_EX;
1898
1899         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_TCP_EX)
1900                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_TCP_EX;
1901
1902         if (ethdev_hash_types & RTE_ETH_RSS_IPV6_UDP_EX)
1903                 virtio_hash_types |= VIRTIO_NET_HASH_TYPE_UDP_EX;
1904
1905         return virtio_hash_types;
1906 }
1907
1908 static uint64_t
1909 virtio_to_ethdev_rss_offloads(uint64_t virtio_hash_types)
1910 {
1911         uint64_t rss_offloads = 0;
1912
1913         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV4)
1914                 rss_offloads |= RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1915                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER;
1916
1917         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV4)
1918                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
1919
1920         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV4)
1921                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
1922
1923         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IPV6)
1924                 rss_offloads |= RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
1925                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
1926
1927         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCPV6)
1928                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
1929
1930         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDPV6)
1931                 rss_offloads |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
1932
1933         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_IP_EX)
1934                 rss_offloads |= RTE_ETH_RSS_IPV6_EX;
1935
1936         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_TCP_EX)
1937                 rss_offloads |= RTE_ETH_RSS_IPV6_TCP_EX;
1938
1939         if (virtio_hash_types & VIRTIO_NET_HASH_TYPE_UDP_EX)
1940                 rss_offloads |= RTE_ETH_RSS_IPV6_UDP_EX;
1941
1942         return rss_offloads;
1943 }
1944
1945 static int
1946 virtio_dev_get_rss_config(struct virtio_hw *hw, uint32_t *rss_hash_types)
1947 {
1948         struct virtio_net_config local_config;
1949         struct virtio_net_config *config = &local_config;
1950
1951         virtio_read_dev_config(hw,
1952                         offsetof(struct virtio_net_config, rss_max_key_size),
1953                         &config->rss_max_key_size,
1954                         sizeof(config->rss_max_key_size));
1955         if (config->rss_max_key_size < VIRTIO_NET_RSS_KEY_SIZE) {
1956                 PMD_INIT_LOG(ERR, "Invalid device RSS max key size (%u)",
1957                                 config->rss_max_key_size);
1958                 return -EINVAL;
1959         }
1960
1961         virtio_read_dev_config(hw,
1962                         offsetof(struct virtio_net_config,
1963                                 rss_max_indirection_table_length),
1964                         &config->rss_max_indirection_table_length,
1965                         sizeof(config->rss_max_indirection_table_length));
1966         if (config->rss_max_indirection_table_length < VIRTIO_NET_RSS_RETA_SIZE) {
1967                 PMD_INIT_LOG(ERR, "Invalid device RSS max reta size (%u)",
1968                                 config->rss_max_indirection_table_length);
1969                 return -EINVAL;
1970         }
1971
1972         virtio_read_dev_config(hw,
1973                         offsetof(struct virtio_net_config, supported_hash_types),
1974                         &config->supported_hash_types,
1975                         sizeof(config->supported_hash_types));
1976         if ((config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK) == 0) {
1977                 PMD_INIT_LOG(ERR, "Invalid device RSS hash types (0x%x)",
1978                                 config->supported_hash_types);
1979                 return -EINVAL;
1980         }
1981
1982         *rss_hash_types = config->supported_hash_types & VIRTIO_NET_HASH_TYPE_MASK;
1983
1984         PMD_INIT_LOG(DEBUG, "Device RSS config:");
1985         PMD_INIT_LOG(DEBUG, "\t-Max key size: %u", config->rss_max_key_size);
1986         PMD_INIT_LOG(DEBUG, "\t-Max reta size: %u", config->rss_max_indirection_table_length);
1987         PMD_INIT_LOG(DEBUG, "\t-Supported hash types: 0x%x", *rss_hash_types);
1988
1989         return 0;
1990 }
1991
1992 static int
1993 virtio_dev_rss_hash_update(struct rte_eth_dev *dev,
1994                 struct rte_eth_rss_conf *rss_conf)
1995 {
1996         struct virtio_hw *hw = dev->data->dev_private;
1997         char old_rss_key[VIRTIO_NET_RSS_KEY_SIZE];
1998         uint32_t old_hash_types;
1999         uint16_t nb_queues;
2000         int ret;
2001
2002         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2003                 return -ENOTSUP;
2004
2005         if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(VIRTIO_NET_HASH_TYPE_MASK))
2006                 return -EINVAL;
2007
2008         old_hash_types = hw->rss_hash_types;
2009         hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2010
2011         if (rss_conf->rss_key && rss_conf->rss_key_len) {
2012                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2013                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2014                                         VIRTIO_NET_RSS_KEY_SIZE);
2015                         ret = -EINVAL;
2016                         goto restore_types;
2017                 }
2018                 memcpy(old_rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2019                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2020         }
2021
2022         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2023         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2024         if (ret < 0) {
2025                 PMD_INIT_LOG(ERR, "Failed to apply new RSS config to the device");
2026                 goto restore_key;
2027         }
2028
2029         return 0;
2030 restore_key:
2031         memcpy(hw->rss_key, old_rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2032 restore_types:
2033         hw->rss_hash_types = old_hash_types;
2034
2035         return ret;
2036 }
2037
2038 static int
2039 virtio_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2040                 struct rte_eth_rss_conf *rss_conf)
2041 {
2042         struct virtio_hw *hw = dev->data->dev_private;
2043
2044         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2045                 return -ENOTSUP;
2046
2047         if (rss_conf->rss_key && rss_conf->rss_key_len >= VIRTIO_NET_RSS_KEY_SIZE)
2048                 memcpy(rss_conf->rss_key, hw->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2049         rss_conf->rss_key_len = VIRTIO_NET_RSS_KEY_SIZE;
2050         rss_conf->rss_hf = virtio_to_ethdev_rss_offloads(hw->rss_hash_types);
2051
2052         return 0;
2053 }
2054
2055 static int virtio_dev_rss_reta_update(struct rte_eth_dev *dev,
2056                          struct rte_eth_rss_reta_entry64 *reta_conf,
2057                          uint16_t reta_size)
2058 {
2059         struct virtio_hw *hw = dev->data->dev_private;
2060         uint16_t nb_queues;
2061         uint16_t old_reta[VIRTIO_NET_RSS_RETA_SIZE];
2062         int idx, pos, i, ret;
2063
2064         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2065                 return -ENOTSUP;
2066
2067         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2068                 return -EINVAL;
2069
2070         memcpy(old_reta, hw->rss_reta, sizeof(old_reta));
2071
2072         for (i = 0; i < reta_size; i++) {
2073                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2074                 pos = i % RTE_ETH_RETA_GROUP_SIZE;
2075
2076                 if (((reta_conf[idx].mask >> pos) & 0x1) == 0)
2077                         continue;
2078
2079                 hw->rss_reta[i] = reta_conf[idx].reta[pos];
2080         }
2081
2082         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2083         ret = virtio_set_multiple_queues_rss(dev, nb_queues);
2084         if (ret < 0) {
2085                 PMD_INIT_LOG(ERR, "Failed to apply new RETA to the device");
2086                 memcpy(hw->rss_reta, old_reta, sizeof(old_reta));
2087         }
2088
2089         hw->rss_rx_queues = dev->data->nb_rx_queues;
2090
2091         return ret;
2092 }
2093
2094 static int virtio_dev_rss_reta_query(struct rte_eth_dev *dev,
2095                          struct rte_eth_rss_reta_entry64 *reta_conf,
2096                          uint16_t reta_size)
2097 {
2098         struct virtio_hw *hw = dev->data->dev_private;
2099         int idx, i;
2100
2101         if (!virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2102                 return -ENOTSUP;
2103
2104         if (reta_size != VIRTIO_NET_RSS_RETA_SIZE)
2105                 return -EINVAL;
2106
2107         for (i = 0; i < reta_size; i++) {
2108                 idx = i / RTE_ETH_RETA_GROUP_SIZE;
2109                 reta_conf[idx].reta[i % RTE_ETH_RETA_GROUP_SIZE] = hw->rss_reta[i];
2110         }
2111
2112         return 0;
2113 }
2114
2115 /*
2116  * As default RSS hash key, it uses the default key of the
2117  * Intel IXGBE devices. It can be updated by the application
2118  * with any 40B key value.
2119  */
2120 static uint8_t rss_intel_key[VIRTIO_NET_RSS_KEY_SIZE] = {
2121         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2122         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2123         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2124         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2125         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2126 };
2127
2128 static int
2129 virtio_dev_rss_init(struct rte_eth_dev *eth_dev)
2130 {
2131         struct virtio_hw *hw = eth_dev->data->dev_private;
2132         uint16_t nb_rx_queues = eth_dev->data->nb_rx_queues;
2133         struct rte_eth_rss_conf *rss_conf;
2134         int ret, i;
2135
2136         if (!nb_rx_queues) {
2137                 PMD_INIT_LOG(ERR, "Cannot init RSS if no Rx queues");
2138                 return -EINVAL;
2139         }
2140
2141         rss_conf = &eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
2142
2143         ret = virtio_dev_get_rss_config(hw, &hw->rss_hash_types);
2144         if (ret)
2145                 return ret;
2146
2147         if (rss_conf->rss_hf) {
2148                 /*  Ensure requested hash types are supported by the device */
2149                 if (rss_conf->rss_hf & ~virtio_to_ethdev_rss_offloads(hw->rss_hash_types))
2150                         return -EINVAL;
2151
2152                 hw->rss_hash_types = ethdev_to_virtio_rss_offloads(rss_conf->rss_hf);
2153         }
2154
2155         if (!hw->rss_key) {
2156                 /* Setup default RSS key if not already setup by the user */
2157                 hw->rss_key = rte_malloc_socket("rss_key",
2158                                 VIRTIO_NET_RSS_KEY_SIZE, 0,
2159                                 eth_dev->device->numa_node);
2160                 if (!hw->rss_key) {
2161                         PMD_INIT_LOG(ERR, "Failed to allocate RSS key");
2162                         return -1;
2163                 }
2164         }
2165
2166         if (rss_conf->rss_key && rss_conf->rss_key_len) {
2167                 if (rss_conf->rss_key_len != VIRTIO_NET_RSS_KEY_SIZE) {
2168                         PMD_INIT_LOG(ERR, "Driver only supports %u RSS key length",
2169                                         VIRTIO_NET_RSS_KEY_SIZE);
2170                         return -EINVAL;
2171                 }
2172                 memcpy(hw->rss_key, rss_conf->rss_key, VIRTIO_NET_RSS_KEY_SIZE);
2173         } else {
2174                 memcpy(hw->rss_key, rss_intel_key, VIRTIO_NET_RSS_KEY_SIZE);
2175         }
2176
2177         if (!hw->rss_reta) {
2178                 /* Setup default RSS reta if not already setup by the user */
2179                 hw->rss_reta = rte_zmalloc_socket("rss_reta",
2180                                 VIRTIO_NET_RSS_RETA_SIZE * sizeof(uint16_t), 0,
2181                                 eth_dev->device->numa_node);
2182                 if (!hw->rss_reta) {
2183                         PMD_INIT_LOG(ERR, "Failed to allocate RSS reta");
2184                         return -1;
2185                 }
2186
2187                 hw->rss_rx_queues = 0;
2188         }
2189
2190         /* Re-initialize the RSS reta if the number of RX queues has changed */
2191         if (hw->rss_rx_queues != nb_rx_queues) {
2192                 for (i = 0; i < VIRTIO_NET_RSS_RETA_SIZE; i++)
2193                         hw->rss_reta[i] = i % nb_rx_queues;
2194                 hw->rss_rx_queues = nb_rx_queues;
2195         }
2196
2197         return 0;
2198 }
2199
2200 #define DUPLEX_UNKNOWN   0xff
2201 /* reset device and renegotiate features if needed */
2202 static int
2203 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
2204 {
2205         struct virtio_hw *hw = eth_dev->data->dev_private;
2206         struct virtio_net_config *config;
2207         struct virtio_net_config local_config;
2208         int ret;
2209
2210         /* Reset the device although not necessary at startup */
2211         virtio_reset(hw);
2212
2213         if (hw->vqs) {
2214                 virtio_dev_free_mbufs(eth_dev);
2215                 virtio_free_queues(hw);
2216         }
2217
2218         /* Tell the host we've noticed this device. */
2219         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
2220
2221         /* Tell the host we've known how to drive the device. */
2222         virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
2223         if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
2224                 return -1;
2225
2226         hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
2227
2228         /* If host does not support both status and MSI-X then disable LSC */
2229         if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
2230                 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
2231         else
2232                 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2233
2234         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2235
2236         /* Setting up rx_header size for the device */
2237         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
2238             virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2239             virtio_with_packed_queue(hw))
2240                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2241         else
2242                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
2243
2244         /* Copy the permanent MAC address to: virtio_hw */
2245         virtio_get_hwaddr(hw);
2246         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
2247                         &eth_dev->data->mac_addrs[0]);
2248         PMD_INIT_LOG(DEBUG,
2249                      "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2250                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2251                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2252
2253         hw->get_speed_via_feat = hw->speed == RTE_ETH_SPEED_NUM_UNKNOWN &&
2254                              virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX);
2255         if (hw->get_speed_via_feat)
2256                 virtio_get_speed_duplex(eth_dev, NULL);
2257         if (hw->duplex == DUPLEX_UNKNOWN)
2258                 hw->duplex = RTE_ETH_LINK_FULL_DUPLEX;
2259         PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
2260                 hw->speed, hw->duplex);
2261         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
2262                 config = &local_config;
2263
2264                 virtio_read_dev_config(hw,
2265                         offsetof(struct virtio_net_config, mac),
2266                         &config->mac, sizeof(config->mac));
2267
2268                 if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2269                         virtio_read_dev_config(hw,
2270                                 offsetof(struct virtio_net_config, status),
2271                                 &config->status, sizeof(config->status));
2272                 } else {
2273                         PMD_INIT_LOG(DEBUG,
2274                                      "VIRTIO_NET_F_STATUS is not supported");
2275                         config->status = 0;
2276                 }
2277
2278                 if (virtio_with_feature(hw, VIRTIO_NET_F_MQ) ||
2279                                 virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2280                         virtio_read_dev_config(hw,
2281                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
2282                                 &config->max_virtqueue_pairs,
2283                                 sizeof(config->max_virtqueue_pairs));
2284                 } else {
2285                         PMD_INIT_LOG(DEBUG,
2286                                      "Neither VIRTIO_NET_F_MQ nor VIRTIO_NET_F_RSS are supported");
2287                         config->max_virtqueue_pairs = 1;
2288                 }
2289
2290                 hw->max_queue_pairs = config->max_virtqueue_pairs;
2291
2292                 if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
2293                         virtio_read_dev_config(hw,
2294                                 offsetof(struct virtio_net_config, mtu),
2295                                 &config->mtu,
2296                                 sizeof(config->mtu));
2297
2298                         /*
2299                          * MTU value has already been checked at negotiation
2300                          * time, but check again in case it has changed since
2301                          * then, which should not happen.
2302                          */
2303                         if (config->mtu < RTE_ETHER_MIN_MTU) {
2304                                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
2305                                                 config->mtu);
2306                                 return -1;
2307                         }
2308
2309                         hw->max_mtu = config->mtu;
2310                         /* Set initial MTU to maximum one supported by vhost */
2311                         eth_dev->data->mtu = config->mtu;
2312
2313                 } else {
2314                         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2315                                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
2316                 }
2317
2318                 hw->rss_hash_types = 0;
2319                 if (virtio_with_feature(hw, VIRTIO_NET_F_RSS))
2320                         if (virtio_dev_rss_init(eth_dev))
2321                                 return -1;
2322
2323                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
2324                                 config->max_virtqueue_pairs);
2325                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
2326                 PMD_INIT_LOG(DEBUG,
2327                                 "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
2328                                 config->mac[0], config->mac[1],
2329                                 config->mac[2], config->mac[3],
2330                                 config->mac[4], config->mac[5]);
2331         } else {
2332                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
2333                 hw->max_queue_pairs = 1;
2334                 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
2335                         VLAN_TAG_LEN - hw->vtnet_hdr_size;
2336         }
2337
2338         ret = virtio_alloc_queues(eth_dev);
2339         if (ret < 0)
2340                 return ret;
2341
2342         if (eth_dev->data->dev_conf.intr_conf.rxq) {
2343                 if (virtio_configure_intr(eth_dev) < 0) {
2344                         PMD_INIT_LOG(ERR, "failed to configure interrupt");
2345                         virtio_free_queues(hw);
2346                         return -1;
2347                 }
2348         }
2349
2350         virtio_reinit_complete(hw);
2351
2352         return 0;
2353 }
2354
2355 /*
2356  * This function is based on probe() function in virtio_pci.c
2357  * It returns 0 on success.
2358  */
2359 int
2360 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
2361 {
2362         struct virtio_hw *hw = eth_dev->data->dev_private;
2363         uint32_t speed = RTE_ETH_SPEED_NUM_UNKNOWN;
2364         int vectorized = 0;
2365         int ret;
2366
2367         if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
2368                 PMD_INIT_LOG(ERR,
2369                         "Not sufficient headroom required = %d, avail = %d",
2370                         (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
2371                         RTE_PKTMBUF_HEADROOM);
2372
2373                 return -1;
2374         }
2375
2376         eth_dev->dev_ops = &virtio_eth_dev_ops;
2377
2378         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
2379                 set_rxtx_funcs(eth_dev);
2380                 return 0;
2381         }
2382
2383         ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
2384         if (ret < 0)
2385                 return ret;
2386         hw->speed = speed;
2387         hw->duplex = DUPLEX_UNKNOWN;
2388
2389         /* Allocate memory for storing MAC addresses */
2390         eth_dev->data->mac_addrs = rte_zmalloc("virtio",
2391                                 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
2392         if (eth_dev->data->mac_addrs == NULL) {
2393                 PMD_INIT_LOG(ERR,
2394                         "Failed to allocate %d bytes needed to store MAC addresses",
2395                         VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
2396                 return -ENOMEM;
2397         }
2398
2399         rte_spinlock_init(&hw->state_lock);
2400
2401         /* reset device and negotiate default features */
2402         ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
2403         if (ret < 0)
2404                 goto err_virtio_init;
2405
2406         if (vectorized) {
2407                 if (!virtio_with_packed_queue(hw)) {
2408                         hw->use_vec_rx = 1;
2409                 } else {
2410 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
2411                         hw->use_vec_rx = 1;
2412                         hw->use_vec_tx = 1;
2413 #else
2414                         PMD_DRV_LOG(INFO,
2415                                 "building environment do not support packed ring vectorized");
2416 #endif
2417                 }
2418         }
2419
2420         hw->opened = 1;
2421
2422         return 0;
2423
2424 err_virtio_init:
2425         rte_free(eth_dev->data->mac_addrs);
2426         eth_dev->data->mac_addrs = NULL;
2427         return ret;
2428 }
2429
2430 static uint32_t
2431 virtio_dev_speed_capa_get(uint32_t speed)
2432 {
2433         switch (speed) {
2434         case RTE_ETH_SPEED_NUM_10G:
2435                 return RTE_ETH_LINK_SPEED_10G;
2436         case RTE_ETH_SPEED_NUM_20G:
2437                 return RTE_ETH_LINK_SPEED_20G;
2438         case RTE_ETH_SPEED_NUM_25G:
2439                 return RTE_ETH_LINK_SPEED_25G;
2440         case RTE_ETH_SPEED_NUM_40G:
2441                 return RTE_ETH_LINK_SPEED_40G;
2442         case RTE_ETH_SPEED_NUM_50G:
2443                 return RTE_ETH_LINK_SPEED_50G;
2444         case RTE_ETH_SPEED_NUM_56G:
2445                 return RTE_ETH_LINK_SPEED_56G;
2446         case RTE_ETH_SPEED_NUM_100G:
2447                 return RTE_ETH_LINK_SPEED_100G;
2448         case RTE_ETH_SPEED_NUM_200G:
2449                 return RTE_ETH_LINK_SPEED_200G;
2450         default:
2451                 return 0;
2452         }
2453 }
2454
2455 static int vectorized_check_handler(__rte_unused const char *key,
2456                 const char *value, void *ret_val)
2457 {
2458         if (strcmp(value, "1") == 0)
2459                 *(int *)ret_val = 1;
2460         else
2461                 *(int *)ret_val = 0;
2462
2463         return 0;
2464 }
2465
2466 #define VIRTIO_ARG_SPEED      "speed"
2467 #define VIRTIO_ARG_VECTORIZED "vectorized"
2468
2469 static int
2470 link_speed_handler(const char *key __rte_unused,
2471                 const char *value, void *ret_val)
2472 {
2473         uint32_t val;
2474         if (!value || !ret_val)
2475                 return -EINVAL;
2476         val = strtoul(value, NULL, 0);
2477         /* validate input */
2478         if (virtio_dev_speed_capa_get(val) == 0)
2479                 return -EINVAL;
2480         *(uint32_t *)ret_val = val;
2481
2482         return 0;
2483 }
2484
2485
2486 static int
2487 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2488 {
2489         struct rte_kvargs *kvlist;
2490         int ret = 0;
2491
2492         if (devargs == NULL)
2493                 return 0;
2494
2495         kvlist = rte_kvargs_parse(devargs->args, NULL);
2496         if (kvlist == NULL) {
2497                 PMD_INIT_LOG(ERR, "error when parsing param");
2498                 return 0;
2499         }
2500
2501         if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2502                 ret = rte_kvargs_process(kvlist,
2503                                         VIRTIO_ARG_SPEED,
2504                                         link_speed_handler, speed);
2505                 if (ret < 0) {
2506                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2507                                         VIRTIO_ARG_SPEED);
2508                         goto exit;
2509                 }
2510         }
2511
2512         if (vectorized &&
2513                 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2514                 ret = rte_kvargs_process(kvlist,
2515                                 VIRTIO_ARG_VECTORIZED,
2516                                 vectorized_check_handler, vectorized);
2517                 if (ret < 0) {
2518                         PMD_INIT_LOG(ERR, "Failed to parse %s",
2519                                         VIRTIO_ARG_VECTORIZED);
2520                         goto exit;
2521                 }
2522         }
2523
2524 exit:
2525         rte_kvargs_free(kvlist);
2526         return ret;
2527 }
2528
2529 static uint8_t
2530 rx_offload_enabled(struct virtio_hw *hw)
2531 {
2532         return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2533                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2534                 virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2535 }
2536
2537 static uint8_t
2538 tx_offload_enabled(struct virtio_hw *hw)
2539 {
2540         return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2541                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2542                 virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2543 }
2544
2545 /*
2546  * Configure virtio device
2547  * It returns 0 on success.
2548  */
2549 static int
2550 virtio_dev_configure(struct rte_eth_dev *dev)
2551 {
2552         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2553         const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2554         struct virtio_hw *hw = dev->data->dev_private;
2555         uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2556                 hw->vtnet_hdr_size;
2557         uint64_t rx_offloads = rxmode->offloads;
2558         uint64_t tx_offloads = txmode->offloads;
2559         uint64_t req_features;
2560         int ret;
2561
2562         PMD_INIT_LOG(DEBUG, "configure");
2563         req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2564
2565         if (rxmode->mq_mode != RTE_ETH_MQ_RX_NONE && rxmode->mq_mode != RTE_ETH_MQ_RX_RSS) {
2566                 PMD_DRV_LOG(ERR,
2567                         "Unsupported Rx multi queue mode %d",
2568                         rxmode->mq_mode);
2569                 return -EINVAL;
2570         }
2571
2572         if (txmode->mq_mode != RTE_ETH_MQ_TX_NONE) {
2573                 PMD_DRV_LOG(ERR,
2574                         "Unsupported Tx multi queue mode %d",
2575                         txmode->mq_mode);
2576                 return -EINVAL;
2577         }
2578
2579         if (dev->data->dev_conf.intr_conf.rxq) {
2580                 ret = virtio_init_device(dev, hw->req_guest_features);
2581                 if (ret < 0)
2582                         return ret;
2583         }
2584
2585         if (rxmode->mq_mode == RTE_ETH_MQ_RX_RSS)
2586                 req_features |= (1ULL << VIRTIO_NET_F_RSS);
2587
2588         if (rxmode->mtu > hw->max_mtu)
2589                 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2590
2591         hw->max_rx_pkt_len = ether_hdr_len + rxmode->mtu;
2592
2593         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2594                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
2595                 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2596
2597         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)
2598                 req_features |=
2599                         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2600                         (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2601
2602         if (tx_offloads & (RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2603                            RTE_ETH_TX_OFFLOAD_TCP_CKSUM))
2604                 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2605
2606         if (tx_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)
2607                 req_features |=
2608                         (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2609                         (1ULL << VIRTIO_NET_F_HOST_TSO6);
2610
2611         /* if request features changed, reinit the device */
2612         if (req_features != hw->req_guest_features) {
2613                 ret = virtio_init_device(dev, req_features);
2614                 if (ret < 0)
2615                         return ret;
2616         }
2617
2618         if ((rxmode->mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
2619                         !virtio_with_feature(hw, VIRTIO_NET_F_RSS)) {
2620                 PMD_DRV_LOG(ERR, "RSS support requested but not supported by the device");
2621                 return -ENOTSUP;
2622         }
2623
2624         if ((rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2625                             RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) &&
2626                 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2627                 PMD_DRV_LOG(ERR,
2628                         "rx checksum not available on this host");
2629                 return -ENOTSUP;
2630         }
2631
2632         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) &&
2633                 (!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2634                  !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2635                 PMD_DRV_LOG(ERR,
2636                         "Large Receive Offload not available on this host");
2637                 return -ENOTSUP;
2638         }
2639
2640         /* start control queue */
2641         if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2642                 virtio_dev_cq_start(dev);
2643
2644         if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2645                 hw->vlan_strip = 1;
2646
2647         hw->rx_ol_scatter = (rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
2648
2649         if ((rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
2650                         !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2651                 PMD_DRV_LOG(ERR,
2652                             "vlan filtering not available on this host");
2653                 return -ENOTSUP;
2654         }
2655
2656         hw->has_tx_offload = tx_offload_enabled(hw);
2657         hw->has_rx_offload = rx_offload_enabled(hw);
2658
2659         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2660                 /* Enable vector (0) for Link State Interrupt */
2661                 if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2662                                 VIRTIO_MSI_NO_VECTOR) {
2663                         PMD_DRV_LOG(ERR, "failed to set config vector");
2664                         return -EBUSY;
2665                 }
2666
2667         if (virtio_with_packed_queue(hw)) {
2668 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2669                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2670                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2671                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2672                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2673                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2674                         PMD_DRV_LOG(INFO,
2675                                 "disabled packed ring vectorized path for requirements not met");
2676                         hw->use_vec_rx = 0;
2677                         hw->use_vec_tx = 0;
2678                 }
2679 #elif defined(RTE_ARCH_ARM)
2680                 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2681                     (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2682                      !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2683                      !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2684                      rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2685                         PMD_DRV_LOG(INFO,
2686                                 "disabled packed ring vectorized path for requirements not met");
2687                         hw->use_vec_rx = 0;
2688                         hw->use_vec_tx = 0;
2689                 }
2690 #else
2691                 hw->use_vec_rx = 0;
2692                 hw->use_vec_tx = 0;
2693 #endif
2694
2695                 if (hw->use_vec_rx) {
2696                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2697                                 PMD_DRV_LOG(INFO,
2698                                         "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2699                                 hw->use_vec_rx = 0;
2700                         }
2701
2702                         if (rx_offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
2703                                 PMD_DRV_LOG(INFO,
2704                                         "disabled packed ring vectorized rx for TCP_LRO enabled");
2705                                 hw->use_vec_rx = 0;
2706                         }
2707                 }
2708         } else {
2709                 if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2710                         hw->use_inorder_tx = 1;
2711                         hw->use_inorder_rx = 1;
2712                         hw->use_vec_rx = 0;
2713                 }
2714
2715                 if (hw->use_vec_rx) {
2716 #if defined RTE_ARCH_ARM
2717                         if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2718                                 PMD_DRV_LOG(INFO,
2719                                         "disabled split ring vectorized path for requirement not met");
2720                                 hw->use_vec_rx = 0;
2721                         }
2722 #endif
2723                         if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2724                                 PMD_DRV_LOG(INFO,
2725                                         "disabled split ring vectorized rx for mrg_rxbuf enabled");
2726                                 hw->use_vec_rx = 0;
2727                         }
2728
2729                         if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
2730                                            RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
2731                                            RTE_ETH_RX_OFFLOAD_TCP_LRO |
2732                                            RTE_ETH_RX_OFFLOAD_VLAN_STRIP)) {
2733                                 PMD_DRV_LOG(INFO,
2734                                         "disabled split ring vectorized rx for offloading enabled");
2735                                 hw->use_vec_rx = 0;
2736                         }
2737
2738                         if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2739                                 PMD_DRV_LOG(INFO,
2740                                         "disabled split ring vectorized rx, max SIMD bitwidth too low");
2741                                 hw->use_vec_rx = 0;
2742                         }
2743                 }
2744         }
2745
2746         return 0;
2747 }
2748
2749
2750 static int
2751 virtio_dev_start(struct rte_eth_dev *dev)
2752 {
2753         uint16_t nb_queues, i;
2754         struct virtqueue *vq;
2755         struct virtio_hw *hw = dev->data->dev_private;
2756         int ret;
2757
2758         /* Finish the initialization of the queues */
2759         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2760                 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2761                 if (ret < 0)
2762                         return ret;
2763         }
2764         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2765                 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2766                 if (ret < 0)
2767                         return ret;
2768         }
2769
2770         /* check if lsc interrupt feature is enabled */
2771         if (dev->data->dev_conf.intr_conf.lsc) {
2772                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2773                         PMD_DRV_LOG(ERR, "link status not supported by host");
2774                         return -ENOTSUP;
2775                 }
2776         }
2777
2778         /* Enable uio/vfio intr/eventfd mapping: although we already did that
2779          * in device configure, but it could be unmapped  when device is
2780          * stopped.
2781          */
2782         if (dev->data->dev_conf.intr_conf.lsc ||
2783             dev->data->dev_conf.intr_conf.rxq) {
2784                 virtio_intr_disable(dev);
2785
2786                 /* Setup interrupt callback  */
2787                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2788                         rte_intr_callback_register(dev->intr_handle,
2789                                                    virtio_interrupt_handler,
2790                                                    dev);
2791
2792                 if (virtio_intr_enable(dev) < 0) {
2793                         PMD_DRV_LOG(ERR, "interrupt enable failed");
2794                         return -EIO;
2795                 }
2796         }
2797
2798         /*Notify the backend
2799          *Otherwise the tap backend might already stop its queue due to fullness.
2800          *vhost backend will have no chance to be waked up
2801          */
2802         nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2803         if (hw->max_queue_pairs > 1) {
2804                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2805                         return -EINVAL;
2806         }
2807
2808         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2809
2810         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2811                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2812                 /* Flush the old packets */
2813                 virtqueue_rxvq_flush(vq);
2814                 virtqueue_notify(vq);
2815         }
2816
2817         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2818                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2819                 virtqueue_notify(vq);
2820         }
2821
2822         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2823
2824         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2825                 vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2826                 VIRTQUEUE_DUMP(vq);
2827         }
2828
2829         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2830                 vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2831                 VIRTQUEUE_DUMP(vq);
2832         }
2833
2834         set_rxtx_funcs(dev);
2835         hw->started = 1;
2836
2837         /* Initialize Link state */
2838         virtio_dev_link_update(dev, 0);
2839
2840         return 0;
2841 }
2842
2843 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2844 {
2845         struct virtio_hw *hw = dev->data->dev_private;
2846         uint16_t nr_vq = virtio_get_nr_vq(hw);
2847         const char *type __rte_unused;
2848         unsigned int i, mbuf_num = 0;
2849         struct virtqueue *vq;
2850         struct rte_mbuf *buf;
2851         int queue_type;
2852
2853         if (hw->vqs == NULL)
2854                 return;
2855
2856         for (i = 0; i < nr_vq; i++) {
2857                 vq = hw->vqs[i];
2858                 if (!vq)
2859                         continue;
2860
2861                 queue_type = virtio_get_queue_type(hw, i);
2862                 if (queue_type == VTNET_RQ)
2863                         type = "rxq";
2864                 else if (queue_type == VTNET_TQ)
2865                         type = "txq";
2866                 else
2867                         continue;
2868
2869                 PMD_INIT_LOG(DEBUG,
2870                         "Before freeing %s[%d] used and unused buf",
2871                         type, i);
2872                 VIRTQUEUE_DUMP(vq);
2873
2874                 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2875                         rte_pktmbuf_free(buf);
2876                         mbuf_num++;
2877                 }
2878
2879                 PMD_INIT_LOG(DEBUG,
2880                         "After freeing %s[%d] used and unused buf",
2881                         type, i);
2882                 VIRTQUEUE_DUMP(vq);
2883         }
2884
2885         PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2886 }
2887
2888 static void
2889 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2890 {
2891         struct virtio_hw *hw = dev->data->dev_private;
2892         struct virtqueue *vq;
2893         int qidx;
2894         void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2895
2896         if (virtio_with_packed_queue(hw)) {
2897                 if (hw->use_vec_tx)
2898                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2899                 else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2900                         xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2901                 else
2902                         xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2903         } else {
2904                 if (hw->use_inorder_tx)
2905                         xmit_cleanup = &virtio_xmit_cleanup_inorder;
2906                 else
2907                         xmit_cleanup = &virtio_xmit_cleanup;
2908         }
2909
2910         for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2911                 vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2912                 if (vq != NULL)
2913                         xmit_cleanup(vq, virtqueue_nused(vq));
2914         }
2915 }
2916
2917 /*
2918  * Stop device: disable interrupt and mark link down
2919  */
2920 int
2921 virtio_dev_stop(struct rte_eth_dev *dev)
2922 {
2923         struct virtio_hw *hw = dev->data->dev_private;
2924         struct rte_eth_link link;
2925         struct rte_eth_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2926
2927         PMD_INIT_LOG(DEBUG, "stop");
2928         dev->data->dev_started = 0;
2929
2930         rte_spinlock_lock(&hw->state_lock);
2931         if (!hw->started)
2932                 goto out_unlock;
2933         hw->started = 0;
2934
2935         virtio_tx_completed_cleanup(dev);
2936
2937         if (intr_conf->lsc || intr_conf->rxq) {
2938                 virtio_intr_disable(dev);
2939
2940                 /* Reset interrupt callback  */
2941                 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2942                         rte_intr_callback_unregister(dev->intr_handle,
2943                                                      virtio_interrupt_handler,
2944                                                      dev);
2945                 }
2946         }
2947
2948         memset(&link, 0, sizeof(link));
2949         rte_eth_linkstatus_set(dev, &link);
2950 out_unlock:
2951         rte_spinlock_unlock(&hw->state_lock);
2952
2953         return 0;
2954 }
2955
2956 static int
2957 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2958 {
2959         struct rte_eth_link link;
2960         uint16_t status;
2961         struct virtio_hw *hw = dev->data->dev_private;
2962
2963         memset(&link, 0, sizeof(link));
2964         link.link_duplex = hw->duplex;
2965         link.link_speed  = hw->speed;
2966         link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2967
2968         if (!hw->started) {
2969                 link.link_status = RTE_ETH_LINK_DOWN;
2970                 link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2971         } else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2972                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2973                 virtio_read_dev_config(hw,
2974                                 offsetof(struct virtio_net_config, status),
2975                                 &status, sizeof(status));
2976                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2977                         link.link_status = RTE_ETH_LINK_DOWN;
2978                         link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2979                         PMD_INIT_LOG(DEBUG, "Port %d is down",
2980                                      dev->data->port_id);
2981                 } else {
2982                         link.link_status = RTE_ETH_LINK_UP;
2983                         if (hw->get_speed_via_feat)
2984                                 virtio_get_speed_duplex(dev, &link);
2985                         PMD_INIT_LOG(DEBUG, "Port %d is up",
2986                                      dev->data->port_id);
2987                 }
2988         } else {
2989                 link.link_status = RTE_ETH_LINK_UP;
2990                 if (hw->get_speed_via_feat)
2991                         virtio_get_speed_duplex(dev, &link);
2992         }
2993
2994         return rte_eth_linkstatus_set(dev, &link);
2995 }
2996
2997 static int
2998 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2999 {
3000         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
3001         struct virtio_hw *hw = dev->data->dev_private;
3002         uint64_t offloads = rxmode->offloads;
3003
3004         if (mask & RTE_ETH_VLAN_FILTER_MASK) {
3005                 if ((offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) &&
3006                                 !virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
3007
3008                         PMD_DRV_LOG(NOTICE,
3009                                 "vlan filtering not available on this host");
3010
3011                         return -ENOTSUP;
3012                 }
3013         }
3014
3015         if (mask & RTE_ETH_VLAN_STRIP_MASK)
3016                 hw->vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
3017
3018         return 0;
3019 }
3020
3021 static int
3022 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
3023 {
3024         uint64_t tso_mask, host_features;
3025         uint32_t rss_hash_types = 0;
3026         struct virtio_hw *hw = dev->data->dev_private;
3027         dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
3028
3029         dev_info->max_rx_queues =
3030                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
3031         dev_info->max_tx_queues =
3032                 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
3033         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
3034         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
3035         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
3036         dev_info->max_mtu = hw->max_mtu;
3037
3038         host_features = VIRTIO_OPS(hw)->get_features(hw);
3039         dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3040         if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
3041                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SCATTER;
3042         if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
3043                 dev_info->rx_offload_capa |=
3044                         RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3045                         RTE_ETH_RX_OFFLOAD_UDP_CKSUM;
3046         }
3047         if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
3048                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3049         tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
3050                 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
3051         if ((host_features & tso_mask) == tso_mask)
3052                 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3053
3054         dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
3055                                     RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
3056         if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
3057                 dev_info->tx_offload_capa |=
3058                         RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
3059                         RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
3060         }
3061         tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
3062                 (1ULL << VIRTIO_NET_F_HOST_TSO6);
3063         if ((host_features & tso_mask) == tso_mask)
3064                 dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
3065
3066         if (host_features & (1ULL << VIRTIO_NET_F_RSS)) {
3067                 virtio_dev_get_rss_config(hw, &rss_hash_types);
3068                 dev_info->hash_key_size = VIRTIO_NET_RSS_KEY_SIZE;
3069                 dev_info->reta_size = VIRTIO_NET_RSS_RETA_SIZE;
3070                 dev_info->flow_type_rss_offloads =
3071                         virtio_to_ethdev_rss_offloads(rss_hash_types);
3072         } else {
3073                 dev_info->hash_key_size = 0;
3074                 dev_info->reta_size = 0;
3075                 dev_info->flow_type_rss_offloads = 0;
3076         }
3077
3078         if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
3079                 /*
3080                  * According to 2.7 Packed Virtqueues,
3081                  * 2.7.10.1 Structure Size and Alignment:
3082                  * The Queue Size value does not have to be a power of 2.
3083                  */
3084                 dev_info->rx_desc_lim.nb_max = UINT16_MAX;
3085                 dev_info->tx_desc_lim.nb_max = UINT16_MAX;
3086         } else {
3087                 /*
3088                  * According to 2.6 Split Virtqueues:
3089                  * Queue Size value is always a power of 2. The maximum Queue
3090                  * Size value is 32768.
3091                  */
3092                 dev_info->rx_desc_lim.nb_max = 32768;
3093                 dev_info->tx_desc_lim.nb_max = 32768;
3094         }
3095         /*
3096          * Actual minimum is not the same for virtqueues of different kinds,
3097          * but to avoid tangling the code with separate branches, rely on
3098          * default thresholds since desc number must be at least of their size.
3099          */
3100         dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
3101                                                RTE_VIRTIO_VPMD_RX_REARM_THRESH);
3102         dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
3103         dev_info->rx_desc_lim.nb_align = 1;
3104         dev_info->tx_desc_lim.nb_align = 1;
3105
3106         return 0;
3107 }
3108
3109 /*
3110  * It enables testpmd to collect per queue stats.
3111  */
3112 static int
3113 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
3114 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
3115 __rte_unused uint8_t is_rx)
3116 {
3117         return 0;
3118 }
3119
3120 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
3121 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);