virtio: simplify queue allocation
[dpdk.git] / drivers / net / virtio / virtio_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <string.h>
36 #include <stdio.h>
37 #include <errno.h>
38 #include <unistd.h>
39
40 #include <rte_ethdev.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_errno.h>
51
52 #include <rte_memory.h>
53 #include <rte_eal.h>
54 #include <rte_dev.h>
55
56 #include "virtio_ethdev.h"
57 #include "virtio_pci.h"
58 #include "virtio_logs.h"
59 #include "virtqueue.h"
60 #include "virtio_rxtx.h"
61
62 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
63 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
64 static int  virtio_dev_configure(struct rte_eth_dev *dev);
65 static int  virtio_dev_start(struct rte_eth_dev *dev);
66 static void virtio_dev_stop(struct rte_eth_dev *dev);
67 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
68 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
69 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
70 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
71 static void virtio_dev_info_get(struct rte_eth_dev *dev,
72                                 struct rte_eth_dev_info *dev_info);
73 static int virtio_dev_link_update(struct rte_eth_dev *dev,
74         __rte_unused int wait_to_complete);
75
76 static void virtio_set_hwaddr(struct virtio_hw *hw);
77 static void virtio_get_hwaddr(struct virtio_hw *hw);
78
79 static void virtio_dev_stats_get(struct rte_eth_dev *dev,
80                                  struct rte_eth_stats *stats);
81 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
82                                  struct rte_eth_xstats *xstats, unsigned n);
83 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
84 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
85 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
86                                 uint16_t vlan_id, int on);
87 static void virtio_mac_addr_add(struct rte_eth_dev *dev,
88                                 struct ether_addr *mac_addr,
89                                 uint32_t index, uint32_t vmdq __rte_unused);
90 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
91 static void virtio_mac_addr_set(struct rte_eth_dev *dev,
92                                 struct ether_addr *mac_addr);
93
94 static int virtio_dev_queue_stats_mapping_set(
95         __rte_unused struct rte_eth_dev *eth_dev,
96         __rte_unused uint16_t queue_id,
97         __rte_unused uint8_t stat_idx,
98         __rte_unused uint8_t is_rx);
99
100 /*
101  * The set of PCI devices this driver supports
102  */
103 static const struct rte_pci_id pci_id_virtio_map[] = {
104
105 #define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
106 #include "rte_pci_dev_ids.h"
107
108 { .vendor_id = 0, /* sentinel */ },
109 };
110
111 struct rte_virtio_xstats_name_off {
112         char name[RTE_ETH_XSTATS_NAME_SIZE];
113         unsigned offset;
114 };
115
116 /* [rt]x_qX_ is prepended to the name string here */
117 static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
118         {"good_packets",           offsetof(struct virtqueue, packets)},
119         {"good_bytes",             offsetof(struct virtqueue, bytes)},
120         {"errors",                 offsetof(struct virtqueue, errors)},
121         {"multicast_packets",      offsetof(struct virtqueue, multicast)},
122         {"broadcast_packets",      offsetof(struct virtqueue, broadcast)},
123         {"undersize_packets",      offsetof(struct virtqueue, size_bins[0])},
124         {"size_64_packets",        offsetof(struct virtqueue, size_bins[1])},
125         {"size_65_127_packets",    offsetof(struct virtqueue, size_bins[2])},
126         {"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
127         {"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
128         {"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
129         {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
130         {"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
131 };
132
133 #define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
134                             sizeof(rte_virtio_q_stat_strings[0]))
135
136 static int
137 virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
138                 int *dlen, int pkt_num)
139 {
140         uint32_t head, i;
141         int k, sum = 0;
142         virtio_net_ctrl_ack status = ~0;
143         struct virtio_pmd_ctrl result;
144
145         ctrl->status = status;
146
147         if (!(vq && vq->hw->cvq)) {
148                 PMD_INIT_LOG(ERR, "Control queue is not supported.");
149                 return -1;
150         }
151         head = vq->vq_desc_head_idx;
152
153         PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
154                 "vq->hw->cvq = %p vq = %p",
155                 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
156
157         if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
158                 return -1;
159
160         memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
161                 sizeof(struct virtio_pmd_ctrl));
162
163         /*
164          * Format is enforced in qemu code:
165          * One TX packet for header;
166          * At least one TX packet per argument;
167          * One RX packet for ACK.
168          */
169         vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
170         vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
171         vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
172         vq->vq_free_cnt--;
173         i = vq->vq_ring.desc[head].next;
174
175         for (k = 0; k < pkt_num; k++) {
176                 vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
177                 vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
178                         + sizeof(struct virtio_net_ctrl_hdr)
179                         + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
180                 vq->vq_ring.desc[i].len = dlen[k];
181                 sum += dlen[k];
182                 vq->vq_free_cnt--;
183                 i = vq->vq_ring.desc[i].next;
184         }
185
186         vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
187         vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
188                         + sizeof(struct virtio_net_ctrl_hdr);
189         vq->vq_ring.desc[i].len = sizeof(ctrl->status);
190         vq->vq_free_cnt--;
191
192         vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
193
194         vq_update_avail_ring(vq, head);
195         vq_update_avail_idx(vq);
196
197         PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
198
199         virtqueue_notify(vq);
200
201         rte_rmb();
202         while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
203                 rte_rmb();
204                 usleep(100);
205         }
206
207         while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
208                 uint32_t idx, desc_idx, used_idx;
209                 struct vring_used_elem *uep;
210
211                 used_idx = (uint32_t)(vq->vq_used_cons_idx
212                                 & (vq->vq_nentries - 1));
213                 uep = &vq->vq_ring.used->ring[used_idx];
214                 idx = (uint32_t) uep->id;
215                 desc_idx = idx;
216
217                 while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
218                         desc_idx = vq->vq_ring.desc[desc_idx].next;
219                         vq->vq_free_cnt++;
220                 }
221
222                 vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
223                 vq->vq_desc_head_idx = idx;
224
225                 vq->vq_used_cons_idx++;
226                 vq->vq_free_cnt++;
227         }
228
229         PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
230                         vq->vq_free_cnt, vq->vq_desc_head_idx);
231
232         memcpy(&result, vq->virtio_net_hdr_mz->addr,
233                         sizeof(struct virtio_pmd_ctrl));
234
235         return result.status;
236 }
237
238 static int
239 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
240 {
241         struct virtio_hw *hw = dev->data->dev_private;
242         struct virtio_pmd_ctrl ctrl;
243         int dlen[1];
244         int ret;
245
246         ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
247         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
248         memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
249
250         dlen[0] = sizeof(uint16_t);
251
252         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
253         if (ret) {
254                 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
255                           "failed, this is too late now...");
256                 return -EINVAL;
257         }
258
259         return 0;
260 }
261
262 void
263 virtio_dev_queue_release(struct virtqueue *vq) {
264         struct virtio_hw *hw;
265
266         if (vq) {
267                 hw = vq->hw;
268                 hw->vtpci_ops->del_queue(hw, vq);
269
270                 rte_free(vq->sw_ring);
271                 rte_free(vq);
272         }
273 }
274
275 int virtio_dev_queue_setup(struct rte_eth_dev *dev,
276                         int queue_type,
277                         uint16_t queue_idx,
278                         uint16_t vtpci_queue_idx,
279                         uint16_t nb_desc,
280                         unsigned int socket_id,
281                         struct virtqueue **pvq)
282 {
283         char vq_name[VIRTQUEUE_MAX_NAME_SZ];
284         const struct rte_memzone *mz;
285         unsigned int vq_size, size;
286         struct virtio_hw *hw = dev->data->dev_private;
287         struct virtqueue *vq = NULL;
288         const char *queue_names[] = {"rvq", "txq", "cvq"};
289
290         PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
291
292         /*
293          * Read the virtqueue size from the Queue Size field
294          * Always power of 2 and if 0 virtqueue does not exist
295          */
296         vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx);
297         PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
298         if (vq_size == 0) {
299                 PMD_INIT_LOG(ERR, "virtqueue does not exist");
300                 return -EINVAL;
301         }
302
303         if (!rte_is_power_of_2(vq_size)) {
304                 PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
305                 return -EINVAL;
306         }
307
308         snprintf(vq_name, sizeof(vq_name), "port%d_%s%d",
309                  dev->data->port_id, queue_names[queue_type], queue_idx);
310         vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
311                          vq_size * sizeof(struct vq_desc_extra),
312                          RTE_CACHE_LINE_SIZE);
313         if (vq == NULL) {
314                 PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
315                 return -ENOMEM;
316         }
317
318         if (queue_type == VTNET_RQ) {
319                 size_t sz_sw;
320
321                 sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
322                         sizeof(vq->sw_ring[0]);
323                 vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", sz_sw,
324                                                  RTE_CACHE_LINE_SIZE,
325                                                  socket_id);
326                 if (!vq->sw_ring) {
327                         PMD_INIT_LOG(ERR, "Can not allocate RX soft ring");
328                         rte_free(vq);
329                         return -ENOMEM;
330                 }
331         }
332
333         vq->hw = hw;
334         vq->port_id = dev->data->port_id;
335         vq->queue_id = queue_idx;
336         vq->vq_queue_index = vtpci_queue_idx;
337         vq->vq_nentries = vq_size;
338
339         if (nb_desc == 0 || nb_desc > vq_size)
340                 nb_desc = vq_size;
341         vq->vq_free_cnt = nb_desc;
342
343         /*
344          * Reserve a memzone for vring elements
345          */
346         size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
347         vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
348         PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
349
350         mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
351                 socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
352         if (mz == NULL) {
353                 if (rte_errno == EEXIST)
354                         mz = rte_memzone_lookup(vq_name);
355                 if (mz == NULL) {
356                         rte_free(vq);
357                         return -ENOMEM;
358                 }
359         }
360
361         /*
362          * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
363          * and only accepts 32 bit page frame number.
364          * Check if the allocated physical memory exceeds 16TB.
365          */
366         if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
367                 PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
368                 rte_free(vq);
369                 return -ENOMEM;
370         }
371
372         memset(mz->addr, 0, sizeof(mz->len));
373         vq->mz = mz;
374         vq->vq_ring_mem = mz->phys_addr;
375         vq->vq_ring_virt_mem = mz->addr;
376         PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
377         PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr);
378         vq->virtio_net_hdr_mz  = NULL;
379         vq->virtio_net_hdr_mem = 0;
380
381         if (queue_type == VTNET_TQ) {
382                 const struct rte_memzone *hdr_mz;
383                 struct virtio_tx_region *txr;
384                 unsigned int i;
385
386                 /*
387                  * For each xmit packet, allocate a virtio_net_hdr
388                  * and indirect ring elements
389                  */
390                 snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
391                          dev->data->port_id, queue_idx);
392                 hdr_mz = rte_memzone_reserve_aligned(vq_name,
393                                                      vq_size * sizeof(*txr),
394                                                      socket_id, 0,
395                                                      RTE_CACHE_LINE_SIZE);
396                 if (hdr_mz == NULL) {
397                         if (rte_errno == EEXIST)
398                                 hdr_mz = rte_memzone_lookup(vq_name);
399                         if (hdr_mz == NULL) {
400                                 rte_free(vq);
401                                 return -ENOMEM;
402                         }
403                 }
404                 vq->virtio_net_hdr_mz = hdr_mz;
405                 vq->virtio_net_hdr_mem = hdr_mz->phys_addr;
406
407                 txr = hdr_mz->addr;
408                 memset(txr, 0, vq_size * sizeof(*txr));
409                 for (i = 0; i < vq_size; i++) {
410                         struct vring_desc *start_dp = txr[i].tx_indir;
411
412                         vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
413
414                         /* first indirect descriptor is always the tx header */
415                         start_dp->addr = vq->virtio_net_hdr_mem
416                                 + i * sizeof(*txr)
417                                 + offsetof(struct virtio_tx_region, tx_hdr);
418
419                         start_dp->len = vq->hw->vtnet_hdr_size;
420                         start_dp->flags = VRING_DESC_F_NEXT;
421                 }
422
423         } else if (queue_type == VTNET_CQ) {
424                 /* Allocate a page for control vq command, data and status */
425                 snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
426                         dev->data->port_id);
427                 vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
428                         PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
429                 if (vq->virtio_net_hdr_mz == NULL) {
430                         if (rte_errno == EEXIST)
431                                 vq->virtio_net_hdr_mz =
432                                         rte_memzone_lookup(vq_name);
433                         if (vq->virtio_net_hdr_mz == NULL) {
434                                 rte_free(vq);
435                                 return -ENOMEM;
436                         }
437                 }
438                 vq->virtio_net_hdr_mem =
439                         vq->virtio_net_hdr_mz->phys_addr;
440                 memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
441         }
442
443         hw->vtpci_ops->setup_queue(hw, vq);
444
445         *pvq = vq;
446         return 0;
447 }
448
449 static int
450 virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
451                 uint32_t socket_id)
452 {
453         struct virtqueue *vq;
454         int ret;
455         struct virtio_hw *hw = dev->data->dev_private;
456
457         PMD_INIT_FUNC_TRACE();
458         ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
459                         vtpci_queue_idx, 0, socket_id, &vq);
460         if (ret < 0) {
461                 PMD_INIT_LOG(ERR, "control vq initialization failed");
462                 return ret;
463         }
464
465         hw->cvq = vq;
466         return 0;
467 }
468
469 static void
470 virtio_free_queues(struct rte_eth_dev *dev)
471 {
472         unsigned int i;
473
474         for (i = 0; i < dev->data->nb_rx_queues; i++)
475                 virtio_dev_rx_queue_release(dev->data->rx_queues[i]);
476
477         dev->data->nb_rx_queues = 0;
478
479         for (i = 0; i < dev->data->nb_tx_queues; i++)
480                 virtio_dev_tx_queue_release(dev->data->tx_queues[i]);
481
482         dev->data->nb_tx_queues = 0;
483 }
484
485 static void
486 virtio_dev_close(struct rte_eth_dev *dev)
487 {
488         struct virtio_hw *hw = dev->data->dev_private;
489
490         PMD_INIT_LOG(DEBUG, "virtio_dev_close");
491
492         if (hw->started == 1)
493                 virtio_dev_stop(dev);
494
495         /* reset the NIC */
496         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
497                 vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
498         vtpci_reset(hw);
499         virtio_dev_free_mbufs(dev);
500         virtio_free_queues(dev);
501 }
502
503 static void
504 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
505 {
506         struct virtio_hw *hw = dev->data->dev_private;
507         struct virtio_pmd_ctrl ctrl;
508         int dlen[1];
509         int ret;
510
511         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
512                 PMD_INIT_LOG(INFO, "host does not support rx control\n");
513                 return;
514         }
515
516         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
517         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
518         ctrl.data[0] = 1;
519         dlen[0] = 1;
520
521         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
522         if (ret)
523                 PMD_INIT_LOG(ERR, "Failed to enable promisc");
524 }
525
526 static void
527 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
528 {
529         struct virtio_hw *hw = dev->data->dev_private;
530         struct virtio_pmd_ctrl ctrl;
531         int dlen[1];
532         int ret;
533
534         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
535                 PMD_INIT_LOG(INFO, "host does not support rx control\n");
536                 return;
537         }
538
539         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
540         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
541         ctrl.data[0] = 0;
542         dlen[0] = 1;
543
544         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
545         if (ret)
546                 PMD_INIT_LOG(ERR, "Failed to disable promisc");
547 }
548
549 static void
550 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
551 {
552         struct virtio_hw *hw = dev->data->dev_private;
553         struct virtio_pmd_ctrl ctrl;
554         int dlen[1];
555         int ret;
556
557         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
558                 PMD_INIT_LOG(INFO, "host does not support rx control\n");
559                 return;
560         }
561
562         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
563         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
564         ctrl.data[0] = 1;
565         dlen[0] = 1;
566
567         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
568         if (ret)
569                 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
570 }
571
572 static void
573 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
574 {
575         struct virtio_hw *hw = dev->data->dev_private;
576         struct virtio_pmd_ctrl ctrl;
577         int dlen[1];
578         int ret;
579
580         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
581                 PMD_INIT_LOG(INFO, "host does not support rx control\n");
582                 return;
583         }
584
585         ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
586         ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
587         ctrl.data[0] = 0;
588         dlen[0] = 1;
589
590         ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
591         if (ret)
592                 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
593 }
594
595 /*
596  * dev_ops for virtio, bare necessities for basic operation
597  */
598 static const struct eth_dev_ops virtio_eth_dev_ops = {
599         .dev_configure           = virtio_dev_configure,
600         .dev_start               = virtio_dev_start,
601         .dev_stop                = virtio_dev_stop,
602         .dev_close               = virtio_dev_close,
603         .promiscuous_enable      = virtio_dev_promiscuous_enable,
604         .promiscuous_disable     = virtio_dev_promiscuous_disable,
605         .allmulticast_enable     = virtio_dev_allmulticast_enable,
606         .allmulticast_disable    = virtio_dev_allmulticast_disable,
607
608         .dev_infos_get           = virtio_dev_info_get,
609         .stats_get               = virtio_dev_stats_get,
610         .xstats_get              = virtio_dev_xstats_get,
611         .stats_reset             = virtio_dev_stats_reset,
612         .xstats_reset            = virtio_dev_stats_reset,
613         .link_update             = virtio_dev_link_update,
614         .rx_queue_setup          = virtio_dev_rx_queue_setup,
615         .rx_queue_release        = virtio_dev_rx_queue_release,
616         .tx_queue_setup          = virtio_dev_tx_queue_setup,
617         .tx_queue_release        = virtio_dev_tx_queue_release,
618         /* collect stats per queue */
619         .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
620         .vlan_filter_set         = virtio_vlan_filter_set,
621         .mac_addr_add            = virtio_mac_addr_add,
622         .mac_addr_remove         = virtio_mac_addr_remove,
623         .mac_addr_set            = virtio_mac_addr_set,
624 };
625
626 static inline int
627 virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev,
628                                 struct rte_eth_link *link)
629 {
630         struct rte_eth_link *dst = link;
631         struct rte_eth_link *src = &(dev->data->dev_link);
632
633         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
634                         *(uint64_t *)src) == 0)
635                 return -1;
636
637         return 0;
638 }
639
640 /**
641  * Atomically writes the link status information into global
642  * structure rte_eth_dev.
643  *
644  * @param dev
645  *   - Pointer to the structure rte_eth_dev to read from.
646  *   - Pointer to the buffer to be saved with the link status.
647  *
648  * @return
649  *   - On success, zero.
650  *   - On failure, negative value.
651  */
652 static inline int
653 virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev,
654                 struct rte_eth_link *link)
655 {
656         struct rte_eth_link *dst = &(dev->data->dev_link);
657         struct rte_eth_link *src = link;
658
659         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
660                                         *(uint64_t *)src) == 0)
661                 return -1;
662
663         return 0;
664 }
665
666 static void
667 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
668 {
669         unsigned i;
670
671         for (i = 0; i < dev->data->nb_tx_queues; i++) {
672                 const struct virtqueue *txvq = dev->data->tx_queues[i];
673                 if (txvq == NULL)
674                         continue;
675
676                 stats->opackets += txvq->packets;
677                 stats->obytes += txvq->bytes;
678                 stats->oerrors += txvq->errors;
679
680                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
681                         stats->q_opackets[i] = txvq->packets;
682                         stats->q_obytes[i] = txvq->bytes;
683                 }
684         }
685
686         for (i = 0; i < dev->data->nb_rx_queues; i++) {
687                 const struct virtqueue *rxvq = dev->data->rx_queues[i];
688                 if (rxvq == NULL)
689                         continue;
690
691                 stats->ipackets += rxvq->packets;
692                 stats->ibytes += rxvq->bytes;
693                 stats->ierrors += rxvq->errors;
694
695                 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
696                         stats->q_ipackets[i] = rxvq->packets;
697                         stats->q_ibytes[i] = rxvq->bytes;
698                 }
699         }
700
701         stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
702 }
703
704 static int
705 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
706                       unsigned n)
707 {
708         unsigned i;
709         unsigned count = 0;
710
711         unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_Q_XSTATS +
712                 dev->data->nb_rx_queues * VIRTIO_NB_Q_XSTATS;
713
714         if (n < nstats)
715                 return nstats;
716
717         for (i = 0; i < dev->data->nb_rx_queues; i++) {
718                 struct virtqueue *rxvq = dev->data->rx_queues[i];
719
720                 if (rxvq == NULL)
721                         continue;
722
723                 unsigned t;
724
725                 for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
726                         snprintf(xstats[count].name, sizeof(xstats[count].name),
727                                  "rx_q%u_%s", i,
728                                  rte_virtio_q_stat_strings[t].name);
729                         xstats[count].value = *(uint64_t *)(((char *)rxvq) +
730                                 rte_virtio_q_stat_strings[t].offset);
731                         count++;
732                 }
733         }
734
735         for (i = 0; i < dev->data->nb_tx_queues; i++) {
736                 struct virtqueue *txvq = dev->data->tx_queues[i];
737
738                 if (txvq == NULL)
739                         continue;
740
741                 unsigned t;
742
743                 for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
744                         snprintf(xstats[count].name, sizeof(xstats[count].name),
745                                  "tx_q%u_%s", i,
746                                  rte_virtio_q_stat_strings[t].name);
747                         xstats[count].value = *(uint64_t *)(((char *)txvq) +
748                                 rte_virtio_q_stat_strings[t].offset);
749                         count++;
750                 }
751         }
752
753         return count;
754 }
755
756 static void
757 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
758 {
759         virtio_update_stats(dev, stats);
760 }
761
762 static void
763 virtio_dev_stats_reset(struct rte_eth_dev *dev)
764 {
765         unsigned int i;
766
767         for (i = 0; i < dev->data->nb_tx_queues; i++) {
768                 struct virtqueue *txvq = dev->data->tx_queues[i];
769                 if (txvq == NULL)
770                         continue;
771
772                 txvq->packets = 0;
773                 txvq->bytes = 0;
774                 txvq->errors = 0;
775                 txvq->multicast = 0;
776                 txvq->broadcast = 0;
777                 memset(txvq->size_bins, 0, sizeof(txvq->size_bins[0]) * 8);
778         }
779
780         for (i = 0; i < dev->data->nb_rx_queues; i++) {
781                 struct virtqueue *rxvq = dev->data->rx_queues[i];
782                 if (rxvq == NULL)
783                         continue;
784
785                 rxvq->packets = 0;
786                 rxvq->bytes = 0;
787                 rxvq->errors = 0;
788                 rxvq->multicast = 0;
789                 rxvq->broadcast = 0;
790                 memset(rxvq->size_bins, 0, sizeof(rxvq->size_bins[0]) * 8);
791         }
792 }
793
794 static void
795 virtio_set_hwaddr(struct virtio_hw *hw)
796 {
797         vtpci_write_dev_config(hw,
798                         offsetof(struct virtio_net_config, mac),
799                         &hw->mac_addr, ETHER_ADDR_LEN);
800 }
801
802 static void
803 virtio_get_hwaddr(struct virtio_hw *hw)
804 {
805         if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
806                 vtpci_read_dev_config(hw,
807                         offsetof(struct virtio_net_config, mac),
808                         &hw->mac_addr, ETHER_ADDR_LEN);
809         } else {
810                 eth_random_addr(&hw->mac_addr[0]);
811                 virtio_set_hwaddr(hw);
812         }
813 }
814
815 static void
816 virtio_mac_table_set(struct virtio_hw *hw,
817                      const struct virtio_net_ctrl_mac *uc,
818                      const struct virtio_net_ctrl_mac *mc)
819 {
820         struct virtio_pmd_ctrl ctrl;
821         int err, len[2];
822
823         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
824                 PMD_DRV_LOG(INFO, "host does not support mac table");
825                 return;
826         }
827
828         ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
829         ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
830
831         len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries);
832         memcpy(ctrl.data, uc, len[0]);
833
834         len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries);
835         memcpy(ctrl.data + len[0], mc, len[1]);
836
837         err = virtio_send_command(hw->cvq, &ctrl, len, 2);
838         if (err != 0)
839                 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
840 }
841
842 static void
843 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
844                     uint32_t index, uint32_t vmdq __rte_unused)
845 {
846         struct virtio_hw *hw = dev->data->dev_private;
847         const struct ether_addr *addrs = dev->data->mac_addrs;
848         unsigned int i;
849         struct virtio_net_ctrl_mac *uc, *mc;
850
851         if (index >= VIRTIO_MAX_MAC_ADDRS) {
852                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
853                 return;
854         }
855
856         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
857         uc->entries = 0;
858         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
859         mc->entries = 0;
860
861         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
862                 const struct ether_addr *addr
863                         = (i == index) ? mac_addr : addrs + i;
864                 struct virtio_net_ctrl_mac *tbl
865                         = is_multicast_ether_addr(addr) ? mc : uc;
866
867                 memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN);
868         }
869
870         virtio_mac_table_set(hw, uc, mc);
871 }
872
873 static void
874 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
875 {
876         struct virtio_hw *hw = dev->data->dev_private;
877         struct ether_addr *addrs = dev->data->mac_addrs;
878         struct virtio_net_ctrl_mac *uc, *mc;
879         unsigned int i;
880
881         if (index >= VIRTIO_MAX_MAC_ADDRS) {
882                 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
883                 return;
884         }
885
886         uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries));
887         uc->entries = 0;
888         mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries));
889         mc->entries = 0;
890
891         for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
892                 struct virtio_net_ctrl_mac *tbl;
893
894                 if (i == index || is_zero_ether_addr(addrs + i))
895                         continue;
896
897                 tbl = is_multicast_ether_addr(addrs + i) ? mc : uc;
898                 memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN);
899         }
900
901         virtio_mac_table_set(hw, uc, mc);
902 }
903
904 static void
905 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
906 {
907         struct virtio_hw *hw = dev->data->dev_private;
908
909         memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN);
910
911         /* Use atomic update if available */
912         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
913                 struct virtio_pmd_ctrl ctrl;
914                 int len = ETHER_ADDR_LEN;
915
916                 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
917                 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
918
919                 memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN);
920                 virtio_send_command(hw->cvq, &ctrl, &len, 1);
921         } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
922                 virtio_set_hwaddr(hw);
923 }
924
925 static int
926 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
927 {
928         struct virtio_hw *hw = dev->data->dev_private;
929         struct virtio_pmd_ctrl ctrl;
930         int len;
931
932         if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
933                 return -ENOTSUP;
934
935         ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
936         ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
937         memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
938         len = sizeof(vlan_id);
939
940         return virtio_send_command(hw->cvq, &ctrl, &len, 1);
941 }
942
943 static int
944 virtio_negotiate_features(struct virtio_hw *hw)
945 {
946         uint64_t host_features;
947
948         /* Prepare guest_features: feature that driver wants to support */
949         hw->guest_features = VIRTIO_PMD_GUEST_FEATURES;
950         PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
951                 hw->guest_features);
952
953         /* Read device(host) feature bits */
954         host_features = hw->vtpci_ops->get_features(hw);
955         PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
956                 host_features);
957
958         /*
959          * Negotiate features: Subset of device feature bits are written back
960          * guest feature bits.
961          */
962         hw->guest_features = vtpci_negotiate_features(hw, host_features);
963         PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
964                 hw->guest_features);
965
966         if (hw->modern) {
967                 if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
968                         PMD_INIT_LOG(ERR,
969                                 "VIRTIO_F_VERSION_1 features is not enabled.");
970                         return -1;
971                 }
972                 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
973                 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
974                         PMD_INIT_LOG(ERR,
975                                 "failed to set FEATURES_OK status!");
976                         return -1;
977                 }
978         }
979
980         return 0;
981 }
982
983 /*
984  * Process Virtio Config changed interrupt and call the callback
985  * if link state changed.
986  */
987 static void
988 virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
989                          void *param)
990 {
991         struct rte_eth_dev *dev = param;
992         struct virtio_hw *hw = dev->data->dev_private;
993         uint8_t isr;
994
995         /* Read interrupt status which clears interrupt */
996         isr = vtpci_isr(hw);
997         PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
998
999         if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
1000                 PMD_DRV_LOG(ERR, "interrupt enable failed");
1001
1002         if (isr & VIRTIO_PCI_ISR_CONFIG) {
1003                 if (virtio_dev_link_update(dev, 0) == 0)
1004                         _rte_eth_dev_callback_process(dev,
1005                                                       RTE_ETH_EVENT_INTR_LSC);
1006         }
1007
1008 }
1009
1010 static void
1011 rx_func_get(struct rte_eth_dev *eth_dev)
1012 {
1013         struct virtio_hw *hw = eth_dev->data->dev_private;
1014         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
1015                 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1016         else
1017                 eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1018 }
1019
1020 /*
1021  * This function is based on probe() function in virtio_pci.c
1022  * It returns 0 on success.
1023  */
1024 static int
1025 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1026 {
1027         struct virtio_hw *hw = eth_dev->data->dev_private;
1028         struct virtio_net_config *config;
1029         struct virtio_net_config local_config;
1030         struct rte_pci_device *pci_dev;
1031         uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
1032         int ret;
1033
1034         RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
1035
1036         eth_dev->dev_ops = &virtio_eth_dev_ops;
1037         eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
1038
1039         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1040                 rx_func_get(eth_dev);
1041                 return 0;
1042         }
1043
1044         /* Allocate memory for storing MAC addresses */
1045         eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
1046         if (eth_dev->data->mac_addrs == NULL) {
1047                 PMD_INIT_LOG(ERR,
1048                         "Failed to allocate %d bytes needed to store MAC addresses",
1049                         VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
1050                 return -ENOMEM;
1051         }
1052
1053         pci_dev = eth_dev->pci_dev;
1054
1055         ret = vtpci_init(pci_dev, hw, &dev_flags);
1056         if (ret)
1057                 return ret;
1058
1059         /* Reset the device although not necessary at startup */
1060         vtpci_reset(hw);
1061
1062         /* Tell the host we've noticed this device. */
1063         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1064
1065         /* Tell the host we've known how to drive the device. */
1066         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1067         if (virtio_negotiate_features(hw) < 0)
1068                 return -1;
1069
1070         /* If host does not support status then disable LSC */
1071         if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
1072                 dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1073
1074         rte_eth_copy_pci_info(eth_dev, pci_dev);
1075         eth_dev->data->dev_flags = dev_flags;
1076
1077         rx_func_get(eth_dev);
1078
1079         /* Setting up rx_header size for the device */
1080         if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1081             vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
1082                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1083         else
1084                 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1085
1086         /* Copy the permanent MAC address to: virtio_hw */
1087         virtio_get_hwaddr(hw);
1088         ether_addr_copy((struct ether_addr *) hw->mac_addr,
1089                         &eth_dev->data->mac_addrs[0]);
1090         PMD_INIT_LOG(DEBUG,
1091                      "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1092                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1093                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1094
1095         if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1096                 config = &local_config;
1097
1098                 vtpci_read_dev_config(hw,
1099                         offsetof(struct virtio_net_config, mac),
1100                         &config->mac, sizeof(config->mac));
1101
1102                 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1103                         vtpci_read_dev_config(hw,
1104                                 offsetof(struct virtio_net_config, status),
1105                                 &config->status, sizeof(config->status));
1106                 } else {
1107                         PMD_INIT_LOG(DEBUG,
1108                                      "VIRTIO_NET_F_STATUS is not supported");
1109                         config->status = 0;
1110                 }
1111
1112                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1113                         vtpci_read_dev_config(hw,
1114                                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1115                                 &config->max_virtqueue_pairs,
1116                                 sizeof(config->max_virtqueue_pairs));
1117                 } else {
1118                         PMD_INIT_LOG(DEBUG,
1119                                      "VIRTIO_NET_F_MQ is not supported");
1120                         config->max_virtqueue_pairs = 1;
1121                 }
1122
1123                 hw->max_rx_queues =
1124                         (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
1125                         VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
1126                 hw->max_tx_queues =
1127                         (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
1128                         VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
1129
1130                 virtio_dev_cq_queue_setup(eth_dev,
1131                                         config->max_virtqueue_pairs * 2,
1132                                         SOCKET_ID_ANY);
1133
1134                 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1135                                 config->max_virtqueue_pairs);
1136                 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1137                 PMD_INIT_LOG(DEBUG,
1138                                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1139                                 config->mac[0], config->mac[1],
1140                                 config->mac[2], config->mac[3],
1141                                 config->mac[4], config->mac[5]);
1142         } else {
1143                 hw->max_rx_queues = 1;
1144                 hw->max_tx_queues = 1;
1145         }
1146
1147         PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
1148                         hw->max_rx_queues, hw->max_tx_queues);
1149         PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1150                         eth_dev->data->port_id, pci_dev->id.vendor_id,
1151                         pci_dev->id.device_id);
1152
1153         /* Setup interrupt callback  */
1154         if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1155                 rte_intr_callback_register(&pci_dev->intr_handle,
1156                                    virtio_interrupt_handler, eth_dev);
1157
1158         virtio_dev_cq_start(eth_dev);
1159
1160         return 0;
1161 }
1162
1163 static int
1164 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
1165 {
1166         struct rte_pci_device *pci_dev;
1167         struct virtio_hw *hw = eth_dev->data->dev_private;
1168
1169         PMD_INIT_FUNC_TRACE();
1170
1171         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1172                 return -EPERM;
1173
1174         /* Close it anyway since there's no way to know if closed */
1175         virtio_dev_close(eth_dev);
1176
1177         pci_dev = eth_dev->pci_dev;
1178
1179         eth_dev->dev_ops = NULL;
1180         eth_dev->tx_pkt_burst = NULL;
1181         eth_dev->rx_pkt_burst = NULL;
1182
1183         virtio_dev_queue_release(hw->cvq);
1184
1185         rte_free(eth_dev->data->mac_addrs);
1186         eth_dev->data->mac_addrs = NULL;
1187
1188         /* reset interrupt callback  */
1189         if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1190                 rte_intr_callback_unregister(&pci_dev->intr_handle,
1191                                                 virtio_interrupt_handler,
1192                                                 eth_dev);
1193         rte_eal_pci_unmap_device(pci_dev);
1194
1195         PMD_INIT_LOG(DEBUG, "dev_uninit completed");
1196
1197         return 0;
1198 }
1199
1200 static struct eth_driver rte_virtio_pmd = {
1201         .pci_drv = {
1202                 .name = "rte_virtio_pmd",
1203                 .id_table = pci_id_virtio_map,
1204                 .drv_flags = RTE_PCI_DRV_DETACHABLE,
1205         },
1206         .eth_dev_init = eth_virtio_dev_init,
1207         .eth_dev_uninit = eth_virtio_dev_uninit,
1208         .dev_private_size = sizeof(struct virtio_hw),
1209 };
1210
1211 /*
1212  * Driver initialization routine.
1213  * Invoked once at EAL init time.
1214  * Register itself as the [Poll Mode] Driver of PCI virtio devices.
1215  * Returns 0 on success.
1216  */
1217 static int
1218 rte_virtio_pmd_init(const char *name __rte_unused,
1219                     const char *param __rte_unused)
1220 {
1221         if (rte_eal_iopl_init() != 0) {
1222                 PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
1223                 return -1;
1224         }
1225
1226         rte_eth_driver_register(&rte_virtio_pmd);
1227         return 0;
1228 }
1229
1230 /*
1231  * Configure virtio device
1232  * It returns 0 on success.
1233  */
1234 static int
1235 virtio_dev_configure(struct rte_eth_dev *dev)
1236 {
1237         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1238         struct virtio_hw *hw = dev->data->dev_private;
1239
1240         PMD_INIT_LOG(DEBUG, "configure");
1241
1242         if (rxmode->hw_ip_checksum) {
1243                 PMD_DRV_LOG(ERR, "HW IP checksum not supported");
1244                 return -EINVAL;
1245         }
1246
1247         hw->vlan_strip = rxmode->hw_vlan_strip;
1248
1249         if (rxmode->hw_vlan_filter
1250             && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
1251                 PMD_DRV_LOG(NOTICE,
1252                             "vlan filtering not available on this host");
1253                 return -ENOTSUP;
1254         }
1255
1256         if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1257                 if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
1258                         PMD_DRV_LOG(ERR, "failed to set config vector");
1259                         return -EBUSY;
1260                 }
1261
1262         return 0;
1263 }
1264
1265
1266 static int
1267 virtio_dev_start(struct rte_eth_dev *dev)
1268 {
1269         uint16_t nb_queues, i;
1270         struct virtio_hw *hw = dev->data->dev_private;
1271
1272         /* check if lsc interrupt feature is enabled */
1273         if (dev->data->dev_conf.intr_conf.lsc) {
1274                 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1275                         PMD_DRV_LOG(ERR, "link status not supported by host");
1276                         return -ENOTSUP;
1277                 }
1278
1279                 if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
1280                         PMD_DRV_LOG(ERR, "interrupt enable failed");
1281                         return -EIO;
1282                 }
1283         }
1284
1285         /* Initialize Link state */
1286         virtio_dev_link_update(dev, 0);
1287
1288         /* On restart after stop do not touch queues */
1289         if (hw->started)
1290                 return 0;
1291
1292         /* Do final configuration before rx/tx engine starts */
1293         virtio_dev_rxtx_start(dev);
1294         vtpci_reinit_complete(hw);
1295
1296         hw->started = 1;
1297
1298         /*Notify the backend
1299          *Otherwise the tap backend might already stop its queue due to fullness.
1300          *vhost backend will have no chance to be waked up
1301          */
1302         nb_queues = dev->data->nb_rx_queues;
1303         if (nb_queues > 1) {
1304                 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
1305                         return -EINVAL;
1306         }
1307
1308         PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
1309
1310         for (i = 0; i < nb_queues; i++)
1311                 virtqueue_notify(dev->data->rx_queues[i]);
1312
1313         PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
1314
1315         for (i = 0; i < dev->data->nb_rx_queues; i++)
1316                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
1317
1318         for (i = 0; i < dev->data->nb_tx_queues; i++)
1319                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
1320
1321         return 0;
1322 }
1323
1324 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
1325 {
1326         struct rte_mbuf *buf;
1327         int i, mbuf_num = 0;
1328
1329         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1330                 PMD_INIT_LOG(DEBUG,
1331                              "Before freeing rxq[%d] used and unused buf", i);
1332                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
1333
1334                 PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p",
1335                                 i, dev->data->rx_queues[i]);
1336                 while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
1337                                         dev->data->rx_queues[i])) != NULL) {
1338                         rte_pktmbuf_free(buf);
1339                         mbuf_num++;
1340                 }
1341
1342                 PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1343                 PMD_INIT_LOG(DEBUG,
1344                              "After freeing rxq[%d] used and unused buf", i);
1345                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
1346         }
1347
1348         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1349                 PMD_INIT_LOG(DEBUG,
1350                              "Before freeing txq[%d] used and unused bufs",
1351                              i);
1352                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
1353
1354                 mbuf_num = 0;
1355                 while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
1356                                         dev->data->tx_queues[i])) != NULL) {
1357                         rte_pktmbuf_free(buf);
1358
1359                         mbuf_num++;
1360                 }
1361
1362                 PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
1363                 PMD_INIT_LOG(DEBUG,
1364                              "After freeing txq[%d] used and unused buf", i);
1365                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
1366         }
1367 }
1368
1369 /*
1370  * Stop device: disable interrupt and mark link down
1371  */
1372 static void
1373 virtio_dev_stop(struct rte_eth_dev *dev)
1374 {
1375         struct rte_eth_link link;
1376         struct virtio_hw *hw = dev->data->dev_private;
1377
1378         PMD_INIT_LOG(DEBUG, "stop");
1379
1380         hw->started = 0;
1381
1382         if (dev->data->dev_conf.intr_conf.lsc)
1383                 rte_intr_disable(&dev->pci_dev->intr_handle);
1384
1385         memset(&link, 0, sizeof(link));
1386         virtio_dev_atomic_write_link_status(dev, &link);
1387 }
1388
1389 static int
1390 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
1391 {
1392         struct rte_eth_link link, old;
1393         uint16_t status;
1394         struct virtio_hw *hw = dev->data->dev_private;
1395         memset(&link, 0, sizeof(link));
1396         virtio_dev_atomic_read_link_status(dev, &link);
1397         old = link;
1398         link.link_duplex = ETH_LINK_FULL_DUPLEX;
1399         link.link_speed  = SPEED_10G;
1400
1401         if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1402                 PMD_INIT_LOG(DEBUG, "Get link status from hw");
1403                 vtpci_read_dev_config(hw,
1404                                 offsetof(struct virtio_net_config, status),
1405                                 &status, sizeof(status));
1406                 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
1407                         link.link_status = ETH_LINK_DOWN;
1408                         PMD_INIT_LOG(DEBUG, "Port %d is down",
1409                                      dev->data->port_id);
1410                 } else {
1411                         link.link_status = ETH_LINK_UP;
1412                         PMD_INIT_LOG(DEBUG, "Port %d is up",
1413                                      dev->data->port_id);
1414                 }
1415         } else {
1416                 link.link_status = ETH_LINK_UP;
1417         }
1418         virtio_dev_atomic_write_link_status(dev, &link);
1419
1420         return (old.link_status == link.link_status) ? -1 : 0;
1421 }
1422
1423 static void
1424 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1425 {
1426         struct virtio_hw *hw = dev->data->dev_private;
1427
1428         dev_info->driver_name = dev->driver->pci_drv.name;
1429         dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
1430         dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
1431         dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
1432         dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
1433         dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
1434         dev_info->default_txconf = (struct rte_eth_txconf) {
1435                 .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
1436         };
1437 }
1438
1439 /*
1440  * It enables testpmd to collect per queue stats.
1441  */
1442 static int
1443 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
1444 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
1445 __rte_unused uint8_t is_rx)
1446 {
1447         return 0;
1448 }
1449
1450 static struct rte_driver rte_virtio_driver = {
1451         .type = PMD_PDEV,
1452         .init = rte_virtio_pmd_init,
1453 };
1454
1455 PMD_REGISTER_DRIVER(rte_virtio_driver);