net/virtio: add callback for device closing
[dpdk.git] / drivers / net / virtio / virtio_pci.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <stdint.h>
5
6 #ifdef RTE_EXEC_ENV_LINUX
7  #include <dirent.h>
8  #include <fcntl.h>
9 #endif
10
11 #include <rte_io.h>
12 #include <rte_bus.h>
13
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
17
18 /*
19  * Following macros are derived from linux/pci_regs.h, however,
20  * we can't simply include that header here, as there is no such
21  * file for non-Linux platform.
22  */
23 #define PCI_CAPABILITY_LIST     0x34
24 #define PCI_CAP_ID_VNDR         0x09
25 #define PCI_CAP_ID_MSIX         0x11
26
27 /*
28  * The remaining space is defined by each driver as the per-driver
29  * configuration space.
30  */
31 #define VIRTIO_PCI_CONFIG(hw) \
32                 (((hw)->use_msix == VIRTIO_MSIX_ENABLED) ? 24 : 20)
33
34 static inline int
35 check_vq_phys_addr_ok(struct virtqueue *vq)
36 {
37         /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
38          * and only accepts 32 bit page frame number.
39          * Check if the allocated physical memory exceeds 16TB.
40          */
41         if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
42                         (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
43                 PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
44                 return 0;
45         }
46
47         return 1;
48 }
49
50 #define PCI_MSIX_ENABLE 0x8000
51
52 static enum virtio_msix_status
53 vtpci_msix_detect(struct rte_pci_device *dev)
54 {
55         uint8_t pos;
56         int ret;
57
58         ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
59         if (ret != 1) {
60                 PMD_INIT_LOG(DEBUG,
61                              "failed to read pci capability list, ret %d", ret);
62                 return VIRTIO_MSIX_NONE;
63         }
64
65         while (pos) {
66                 uint8_t cap[2];
67
68                 ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
69                 if (ret != sizeof(cap)) {
70                         PMD_INIT_LOG(DEBUG,
71                                      "failed to read pci cap at pos: %x ret %d",
72                                      pos, ret);
73                         break;
74                 }
75
76                 if (cap[0] == PCI_CAP_ID_MSIX) {
77                         uint16_t flags;
78
79                         ret = rte_pci_read_config(dev, &flags, sizeof(flags),
80                                         pos + sizeof(cap));
81                         if (ret != sizeof(flags)) {
82                                 PMD_INIT_LOG(DEBUG,
83                                              "failed to read pci cap at pos:"
84                                              " %x ret %d", pos + 2, ret);
85                                 break;
86                         }
87
88                         if (flags & PCI_MSIX_ENABLE)
89                                 return VIRTIO_MSIX_ENABLED;
90                         else
91                                 return VIRTIO_MSIX_DISABLED;
92                 }
93
94                 pos = cap[1];
95         }
96
97         return VIRTIO_MSIX_NONE;
98 }
99
100 /*
101  * Since we are in legacy mode:
102  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
103  *
104  * "Note that this is possible because while the virtio header is PCI (i.e.
105  * little) endian, the device-specific region is encoded in the native endian of
106  * the guest (where such distinction is applicable)."
107  *
108  * For powerpc which supports both, qemu supposes that cpu is big endian and
109  * enforces this for the virtio-net stuff.
110  */
111 static void
112 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
113                        void *dst, int length)
114 {
115 #ifdef RTE_ARCH_PPC_64
116         int size;
117
118         while (length > 0) {
119                 if (length >= 4) {
120                         size = 4;
121                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
122                                 VIRTIO_PCI_CONFIG(hw) + offset);
123                         *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
124                 } else if (length >= 2) {
125                         size = 2;
126                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
127                                 VIRTIO_PCI_CONFIG(hw) + offset);
128                         *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
129                 } else {
130                         size = 1;
131                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
132                                 VIRTIO_PCI_CONFIG(hw) + offset);
133                 }
134
135                 dst = (char *)dst + size;
136                 offset += size;
137                 length -= size;
138         }
139 #else
140         rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
141                 VIRTIO_PCI_CONFIG(hw) + offset);
142 #endif
143 }
144
145 static void
146 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
147                         const void *src, int length)
148 {
149 #ifdef RTE_ARCH_PPC_64
150         union {
151                 uint32_t u32;
152                 uint16_t u16;
153         } tmp;
154         int size;
155
156         while (length > 0) {
157                 if (length >= 4) {
158                         size = 4;
159                         tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
160                         rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
161                                 VIRTIO_PCI_CONFIG(hw) + offset);
162                 } else if (length >= 2) {
163                         size = 2;
164                         tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
165                         rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
166                                 VIRTIO_PCI_CONFIG(hw) + offset);
167                 } else {
168                         size = 1;
169                         rte_pci_ioport_write(VTPCI_IO(hw), src, size,
170                                 VIRTIO_PCI_CONFIG(hw) + offset);
171                 }
172
173                 src = (const char *)src + size;
174                 offset += size;
175                 length -= size;
176         }
177 #else
178         rte_pci_ioport_write(VTPCI_IO(hw), src, length,
179                 VIRTIO_PCI_CONFIG(hw) + offset);
180 #endif
181 }
182
183 static uint64_t
184 legacy_get_features(struct virtio_hw *hw)
185 {
186         uint32_t dst;
187
188         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
189         return dst;
190 }
191
192 static void
193 legacy_set_features(struct virtio_hw *hw, uint64_t features)
194 {
195         if ((features >> 32) != 0) {
196                 PMD_DRV_LOG(ERR,
197                         "only 32 bit features are allowed for legacy virtio!");
198                 return;
199         }
200         rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
201                 VIRTIO_PCI_GUEST_FEATURES);
202 }
203
204 static uint8_t
205 legacy_get_status(struct virtio_hw *hw)
206 {
207         uint8_t dst;
208
209         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
210         return dst;
211 }
212
213 static void
214 legacy_set_status(struct virtio_hw *hw, uint8_t status)
215 {
216         rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
217 }
218
219 static uint8_t
220 legacy_get_isr(struct virtio_hw *hw)
221 {
222         uint8_t dst;
223
224         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
225         return dst;
226 }
227
228 /* Enable one vector (0) for Link State Intrerrupt */
229 static uint16_t
230 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
231 {
232         uint16_t dst;
233
234         rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
235         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
236         return dst;
237 }
238
239 static uint16_t
240 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
241 {
242         uint16_t dst;
243
244         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
245                 VIRTIO_PCI_QUEUE_SEL);
246         rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
247         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
248         return dst;
249 }
250
251 static uint16_t
252 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
253 {
254         uint16_t dst;
255
256         rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
257         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
258         return dst;
259 }
260
261 static int
262 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
263 {
264         uint32_t src;
265
266         if (!check_vq_phys_addr_ok(vq))
267                 return -1;
268
269         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
270                 VIRTIO_PCI_QUEUE_SEL);
271         src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
272         rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
273
274         return 0;
275 }
276
277 static void
278 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
279 {
280         uint32_t src = 0;
281
282         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
283                 VIRTIO_PCI_QUEUE_SEL);
284         rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
285 }
286
287 static void
288 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
289 {
290         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
291                 VIRTIO_PCI_QUEUE_NOTIFY);
292 }
293
294 static void
295 legacy_intr_detect(struct virtio_hw *hw)
296 {
297         hw->use_msix = vtpci_msix_detect(VTPCI_DEV(hw));
298 }
299
300 static int
301 legacy_dev_close(struct virtio_hw *hw)
302 {
303         struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
304
305         rte_pci_unmap_device(dev->pci_dev);
306         rte_pci_ioport_unmap(VTPCI_IO(hw));
307
308         return 0;
309 }
310
311 const struct virtio_pci_ops legacy_ops = {
312         .read_dev_cfg   = legacy_read_dev_config,
313         .write_dev_cfg  = legacy_write_dev_config,
314         .get_status     = legacy_get_status,
315         .set_status     = legacy_set_status,
316         .get_features   = legacy_get_features,
317         .set_features   = legacy_set_features,
318         .get_isr        = legacy_get_isr,
319         .set_config_irq = legacy_set_config_irq,
320         .set_queue_irq  = legacy_set_queue_irq,
321         .get_queue_num  = legacy_get_queue_num,
322         .setup_queue    = legacy_setup_queue,
323         .del_queue      = legacy_del_queue,
324         .notify_queue   = legacy_notify_queue,
325         .intr_detect    = legacy_intr_detect,
326         .dev_close      = legacy_dev_close,
327 };
328
329 static inline void
330 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
331 {
332         rte_write32(val & ((1ULL << 32) - 1), lo);
333         rte_write32(val >> 32,               hi);
334 }
335
336 static void
337 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
338                        void *dst, int length)
339 {
340         int i;
341         uint8_t *p;
342         uint8_t old_gen, new_gen;
343
344         do {
345                 old_gen = rte_read8(&hw->common_cfg->config_generation);
346
347                 p = dst;
348                 for (i = 0;  i < length; i++)
349                         *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
350
351                 new_gen = rte_read8(&hw->common_cfg->config_generation);
352         } while (old_gen != new_gen);
353 }
354
355 static void
356 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
357                         const void *src, int length)
358 {
359         int i;
360         const uint8_t *p = src;
361
362         for (i = 0;  i < length; i++)
363                 rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
364 }
365
366 static uint64_t
367 modern_get_features(struct virtio_hw *hw)
368 {
369         uint32_t features_lo, features_hi;
370
371         rte_write32(0, &hw->common_cfg->device_feature_select);
372         features_lo = rte_read32(&hw->common_cfg->device_feature);
373
374         rte_write32(1, &hw->common_cfg->device_feature_select);
375         features_hi = rte_read32(&hw->common_cfg->device_feature);
376
377         return ((uint64_t)features_hi << 32) | features_lo;
378 }
379
380 static void
381 modern_set_features(struct virtio_hw *hw, uint64_t features)
382 {
383         rte_write32(0, &hw->common_cfg->guest_feature_select);
384         rte_write32(features & ((1ULL << 32) - 1),
385                     &hw->common_cfg->guest_feature);
386
387         rte_write32(1, &hw->common_cfg->guest_feature_select);
388         rte_write32(features >> 32,
389                     &hw->common_cfg->guest_feature);
390 }
391
392 static uint8_t
393 modern_get_status(struct virtio_hw *hw)
394 {
395         return rte_read8(&hw->common_cfg->device_status);
396 }
397
398 static void
399 modern_set_status(struct virtio_hw *hw, uint8_t status)
400 {
401         rte_write8(status, &hw->common_cfg->device_status);
402 }
403
404 static uint8_t
405 modern_get_isr(struct virtio_hw *hw)
406 {
407         return rte_read8(hw->isr);
408 }
409
410 static uint16_t
411 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
412 {
413         rte_write16(vec, &hw->common_cfg->msix_config);
414         return rte_read16(&hw->common_cfg->msix_config);
415 }
416
417 static uint16_t
418 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
419 {
420         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
421         rte_write16(vec, &hw->common_cfg->queue_msix_vector);
422         return rte_read16(&hw->common_cfg->queue_msix_vector);
423 }
424
425 static uint16_t
426 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
427 {
428         rte_write16(queue_id, &hw->common_cfg->queue_select);
429         return rte_read16(&hw->common_cfg->queue_size);
430 }
431
432 static int
433 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
434 {
435         uint64_t desc_addr, avail_addr, used_addr;
436         uint16_t notify_off;
437
438         if (!check_vq_phys_addr_ok(vq))
439                 return -1;
440
441         desc_addr = vq->vq_ring_mem;
442         avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
443         used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
444                                                          ring[vq->vq_nentries]),
445                                    VIRTIO_PCI_VRING_ALIGN);
446
447         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
448
449         io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
450                                       &hw->common_cfg->queue_desc_hi);
451         io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
452                                        &hw->common_cfg->queue_avail_hi);
453         io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
454                                       &hw->common_cfg->queue_used_hi);
455
456         notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
457         vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
458                                 notify_off * hw->notify_off_multiplier);
459
460         rte_write16(1, &hw->common_cfg->queue_enable);
461
462         PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
463         PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
464         PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
465         PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
466         PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
467                 vq->notify_addr, notify_off);
468
469         return 0;
470 }
471
472 static void
473 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
474 {
475         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
476
477         io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
478                                   &hw->common_cfg->queue_desc_hi);
479         io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
480                                   &hw->common_cfg->queue_avail_hi);
481         io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
482                                   &hw->common_cfg->queue_used_hi);
483
484         rte_write16(0, &hw->common_cfg->queue_enable);
485 }
486
487 static void
488 modern_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
489 {
490         uint32_t notify_data;
491
492         if (!vtpci_with_feature(hw, VIRTIO_F_NOTIFICATION_DATA)) {
493                 rte_write16(vq->vq_queue_index, vq->notify_addr);
494                 return;
495         }
496
497         if (vtpci_with_feature(hw, VIRTIO_F_RING_PACKED)) {
498                 /*
499                  * Bit[0:15]: vq queue index
500                  * Bit[16:30]: avail index
501                  * Bit[31]: avail wrap counter
502                  */
503                 notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
504                                 VRING_PACKED_DESC_F_AVAIL)) << 31) |
505                                 ((uint32_t)vq->vq_avail_idx << 16) |
506                                 vq->vq_queue_index;
507         } else {
508                 /*
509                  * Bit[0:15]: vq queue index
510                  * Bit[16:31]: avail index
511                  */
512                 notify_data = ((uint32_t)vq->vq_avail_idx << 16) |
513                                 vq->vq_queue_index;
514         }
515         rte_write32(notify_data, vq->notify_addr);
516 }
517
518
519
520 static void
521 modern_intr_detect(struct virtio_hw *hw)
522 {
523         hw->use_msix = vtpci_msix_detect(VTPCI_DEV(hw));
524 }
525
526 static int
527 modern_dev_close(struct virtio_hw *hw)
528 {
529         struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
530
531         rte_pci_unmap_device(dev->pci_dev);
532
533         return 0;
534 }
535
536 const struct virtio_pci_ops modern_ops = {
537         .read_dev_cfg   = modern_read_dev_config,
538         .write_dev_cfg  = modern_write_dev_config,
539         .get_status     = modern_get_status,
540         .set_status     = modern_set_status,
541         .get_features   = modern_get_features,
542         .set_features   = modern_set_features,
543         .get_isr        = modern_get_isr,
544         .set_config_irq = modern_set_config_irq,
545         .set_queue_irq  = modern_set_queue_irq,
546         .get_queue_num  = modern_get_queue_num,
547         .setup_queue    = modern_setup_queue,
548         .del_queue      = modern_del_queue,
549         .notify_queue   = modern_notify_queue,
550         .intr_detect    = modern_intr_detect,
551         .dev_close      = modern_dev_close,
552 };
553
554
555 void
556 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
557                       void *dst, int length)
558 {
559         VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
560 }
561
562 void
563 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
564                        const void *src, int length)
565 {
566         VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
567 }
568
569 uint64_t
570 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
571 {
572         uint64_t features;
573
574         /*
575          * Limit negotiated features to what the driver, virtqueue, and
576          * host all support.
577          */
578         features = host_features & hw->guest_features;
579         VTPCI_OPS(hw)->set_features(hw, features);
580
581         return features;
582 }
583
584 void
585 vtpci_reset(struct virtio_hw *hw)
586 {
587         VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
588         /* flush status write */
589         VTPCI_OPS(hw)->get_status(hw);
590 }
591
592 void
593 vtpci_reinit_complete(struct virtio_hw *hw)
594 {
595         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
596 }
597
598 void
599 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
600 {
601         if (status != VIRTIO_CONFIG_STATUS_RESET)
602                 status |= VTPCI_OPS(hw)->get_status(hw);
603
604         VTPCI_OPS(hw)->set_status(hw, status);
605 }
606
607 uint8_t
608 vtpci_get_status(struct virtio_hw *hw)
609 {
610         return VTPCI_OPS(hw)->get_status(hw);
611 }
612
613 uint8_t
614 vtpci_isr(struct virtio_hw *hw)
615 {
616         return VTPCI_OPS(hw)->get_isr(hw);
617 }
618
619 static void *
620 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
621 {
622         uint8_t  bar    = cap->bar;
623         uint32_t length = cap->length;
624         uint32_t offset = cap->offset;
625         uint8_t *base;
626
627         if (bar >= PCI_MAX_RESOURCE) {
628                 PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
629                 return NULL;
630         }
631
632         if (offset + length < offset) {
633                 PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
634                         offset, length);
635                 return NULL;
636         }
637
638         if (offset + length > dev->mem_resource[bar].len) {
639                 PMD_INIT_LOG(ERR,
640                         "invalid cap: overflows bar space: %u > %" PRIu64,
641                         offset + length, dev->mem_resource[bar].len);
642                 return NULL;
643         }
644
645         base = dev->mem_resource[bar].addr;
646         if (base == NULL) {
647                 PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
648                 return NULL;
649         }
650
651         return base + offset;
652 }
653
654 static int
655 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
656 {
657         uint8_t pos;
658         struct virtio_pci_cap cap;
659         int ret;
660
661         if (rte_pci_map_device(dev)) {
662                 PMD_INIT_LOG(DEBUG, "failed to map pci device!");
663                 return -1;
664         }
665
666         ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
667         if (ret != 1) {
668                 PMD_INIT_LOG(DEBUG,
669                              "failed to read pci capability list, ret %d", ret);
670                 return -1;
671         }
672
673         while (pos) {
674                 ret = rte_pci_read_config(dev, &cap, 2, pos);
675                 if (ret != 2) {
676                         PMD_INIT_LOG(DEBUG,
677                                      "failed to read pci cap at pos: %x ret %d",
678                                      pos, ret);
679                         break;
680                 }
681
682                 if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
683                         /* Transitional devices would also have this capability,
684                          * that's why we also check if msix is enabled.
685                          * 1st byte is cap ID; 2nd byte is the position of next
686                          * cap; next two bytes are the flags.
687                          */
688                         uint16_t flags;
689
690                         ret = rte_pci_read_config(dev, &flags, sizeof(flags),
691                                         pos + 2);
692                         if (ret != sizeof(flags)) {
693                                 PMD_INIT_LOG(DEBUG,
694                                              "failed to read pci cap at pos:"
695                                              " %x ret %d", pos + 2, ret);
696                                 break;
697                         }
698
699                         if (flags & PCI_MSIX_ENABLE)
700                                 hw->use_msix = VIRTIO_MSIX_ENABLED;
701                         else
702                                 hw->use_msix = VIRTIO_MSIX_DISABLED;
703                 }
704
705                 if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
706                         PMD_INIT_LOG(DEBUG,
707                                 "[%2x] skipping non VNDR cap id: %02x",
708                                 pos, cap.cap_vndr);
709                         goto next;
710                 }
711
712                 ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
713                 if (ret != sizeof(cap)) {
714                         PMD_INIT_LOG(DEBUG,
715                                      "failed to read pci cap at pos: %x ret %d",
716                                      pos, ret);
717                         break;
718                 }
719
720                 PMD_INIT_LOG(DEBUG,
721                         "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
722                         pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
723
724                 switch (cap.cfg_type) {
725                 case VIRTIO_PCI_CAP_COMMON_CFG:
726                         hw->common_cfg = get_cfg_addr(dev, &cap);
727                         break;
728                 case VIRTIO_PCI_CAP_NOTIFY_CFG:
729                         ret = rte_pci_read_config(dev,
730                                         &hw->notify_off_multiplier,
731                                         4, pos + sizeof(cap));
732                         if (ret != 4)
733                                 PMD_INIT_LOG(DEBUG,
734                                         "failed to read notify_off_multiplier, ret %d",
735                                         ret);
736                         else
737                                 hw->notify_base = get_cfg_addr(dev, &cap);
738                         break;
739                 case VIRTIO_PCI_CAP_DEVICE_CFG:
740                         hw->dev_cfg = get_cfg_addr(dev, &cap);
741                         break;
742                 case VIRTIO_PCI_CAP_ISR_CFG:
743                         hw->isr = get_cfg_addr(dev, &cap);
744                         break;
745                 }
746
747 next:
748                 pos = cap.cap_next;
749         }
750
751         if (hw->common_cfg == NULL || hw->notify_base == NULL ||
752             hw->dev_cfg == NULL    || hw->isr == NULL) {
753                 PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
754                 return -1;
755         }
756
757         PMD_INIT_LOG(INFO, "found modern virtio pci device.");
758
759         PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
760         PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
761         PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
762         PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
763                 hw->notify_base, hw->notify_off_multiplier);
764
765         return 0;
766 }
767
768 /*
769  * Return -1:
770  *   if there is error mapping with VFIO/UIO.
771  *   if port map error when driver type is KDRV_NONE.
772  *   if marked as allowed but driver type is KDRV_UNKNOWN.
773  * Return 1 if kernel driver is managing the device.
774  * Return 0 on success.
775  */
776 int
777 vtpci_init(struct rte_pci_device *pci_dev, struct virtio_pci_dev *dev)
778 {
779         struct virtio_hw *hw = &dev->hw;
780
781         RTE_BUILD_BUG_ON(offsetof(struct virtio_pci_dev, hw) != 0);
782
783         dev->pci_dev = pci_dev;
784
785         /*
786          * Try if we can succeed reading virtio pci caps, which exists
787          * only on modern pci device. If failed, we fallback to legacy
788          * virtio handling.
789          */
790         if (virtio_read_caps(pci_dev, hw) == 0) {
791                 PMD_INIT_LOG(INFO, "modern virtio pci detected.");
792                 virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
793                 hw->bus_type = VIRTIO_BUS_PCI_MODERN;
794                 dev->modern = true;
795                 goto msix_detect;
796         }
797
798         PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
799         if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) {
800                 rte_pci_unmap_device(pci_dev);
801                 if (pci_dev->kdrv == RTE_PCI_KDRV_UNKNOWN &&
802                     (!pci_dev->device.devargs ||
803                      pci_dev->device.devargs->bus !=
804                      rte_bus_find_by_name("pci"))) {
805                         PMD_INIT_LOG(INFO,
806                                 "skip kernel managed virtio device.");
807                         return 1;
808                 }
809                 return -1;
810         }
811
812         virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
813         hw->bus_type = VIRTIO_BUS_PCI_LEGACY;
814         dev->modern = false;
815
816 msix_detect:
817         VTPCI_OPS(hw)->intr_detect(hw);
818
819         return 0;
820 }
821