net/virtio: move MSI-X detection to PCI ethdev
[dpdk.git] / drivers / net / virtio / virtio_pci.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <stdint.h>
5
6 #ifdef RTE_EXEC_ENV_LINUX
7  #include <dirent.h>
8  #include <fcntl.h>
9 #endif
10
11 #include <rte_io.h>
12 #include <rte_bus.h>
13
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
17
18 /*
19  * Following macros are derived from linux/pci_regs.h, however,
20  * we can't simply include that header here, as there is no such
21  * file for non-Linux platform.
22  */
23 #define PCI_CAPABILITY_LIST     0x34
24 #define PCI_CAP_ID_VNDR         0x09
25 #define PCI_CAP_ID_MSIX         0x11
26
27 /*
28  * The remaining space is defined by each driver as the per-driver
29  * configuration space.
30  */
31 #define VIRTIO_PCI_CONFIG(hw) \
32                 (((hw)->use_msix == VIRTIO_MSIX_ENABLED) ? 24 : 20)
33
34 static inline int
35 check_vq_phys_addr_ok(struct virtqueue *vq)
36 {
37         /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
38          * and only accepts 32 bit page frame number.
39          * Check if the allocated physical memory exceeds 16TB.
40          */
41         if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
42                         (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
43                 PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
44                 return 0;
45         }
46
47         return 1;
48 }
49
50 #define PCI_MSIX_ENABLE 0x8000
51
52 static enum virtio_msix_status
53 vtpci_msix_detect(struct rte_pci_device *dev)
54 {
55         uint8_t pos;
56         int ret;
57
58         ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
59         if (ret != 1) {
60                 PMD_INIT_LOG(DEBUG,
61                              "failed to read pci capability list, ret %d", ret);
62                 return VIRTIO_MSIX_NONE;
63         }
64
65         while (pos) {
66                 uint8_t cap[2];
67
68                 ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
69                 if (ret != sizeof(cap)) {
70                         PMD_INIT_LOG(DEBUG,
71                                      "failed to read pci cap at pos: %x ret %d",
72                                      pos, ret);
73                         break;
74                 }
75
76                 if (cap[0] == PCI_CAP_ID_MSIX) {
77                         uint16_t flags;
78
79                         ret = rte_pci_read_config(dev, &flags, sizeof(flags),
80                                         pos + sizeof(cap));
81                         if (ret != sizeof(flags)) {
82                                 PMD_INIT_LOG(DEBUG,
83                                              "failed to read pci cap at pos:"
84                                              " %x ret %d", pos + 2, ret);
85                                 break;
86                         }
87
88                         if (flags & PCI_MSIX_ENABLE)
89                                 return VIRTIO_MSIX_ENABLED;
90                         else
91                                 return VIRTIO_MSIX_DISABLED;
92                 }
93
94                 pos = cap[1];
95         }
96
97         return VIRTIO_MSIX_NONE;
98 }
99
100 /*
101  * Since we are in legacy mode:
102  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
103  *
104  * "Note that this is possible because while the virtio header is PCI (i.e.
105  * little) endian, the device-specific region is encoded in the native endian of
106  * the guest (where such distinction is applicable)."
107  *
108  * For powerpc which supports both, qemu supposes that cpu is big endian and
109  * enforces this for the virtio-net stuff.
110  */
111 static void
112 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
113                        void *dst, int length)
114 {
115 #ifdef RTE_ARCH_PPC_64
116         int size;
117
118         while (length > 0) {
119                 if (length >= 4) {
120                         size = 4;
121                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
122                                 VIRTIO_PCI_CONFIG(hw) + offset);
123                         *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
124                 } else if (length >= 2) {
125                         size = 2;
126                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
127                                 VIRTIO_PCI_CONFIG(hw) + offset);
128                         *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
129                 } else {
130                         size = 1;
131                         rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
132                                 VIRTIO_PCI_CONFIG(hw) + offset);
133                 }
134
135                 dst = (char *)dst + size;
136                 offset += size;
137                 length -= size;
138         }
139 #else
140         rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
141                 VIRTIO_PCI_CONFIG(hw) + offset);
142 #endif
143 }
144
145 static void
146 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
147                         const void *src, int length)
148 {
149 #ifdef RTE_ARCH_PPC_64
150         union {
151                 uint32_t u32;
152                 uint16_t u16;
153         } tmp;
154         int size;
155
156         while (length > 0) {
157                 if (length >= 4) {
158                         size = 4;
159                         tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
160                         rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
161                                 VIRTIO_PCI_CONFIG(hw) + offset);
162                 } else if (length >= 2) {
163                         size = 2;
164                         tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
165                         rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
166                                 VIRTIO_PCI_CONFIG(hw) + offset);
167                 } else {
168                         size = 1;
169                         rte_pci_ioport_write(VTPCI_IO(hw), src, size,
170                                 VIRTIO_PCI_CONFIG(hw) + offset);
171                 }
172
173                 src = (const char *)src + size;
174                 offset += size;
175                 length -= size;
176         }
177 #else
178         rte_pci_ioport_write(VTPCI_IO(hw), src, length,
179                 VIRTIO_PCI_CONFIG(hw) + offset);
180 #endif
181 }
182
183 static uint64_t
184 legacy_get_features(struct virtio_hw *hw)
185 {
186         uint32_t dst;
187
188         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
189         return dst;
190 }
191
192 static void
193 legacy_set_features(struct virtio_hw *hw, uint64_t features)
194 {
195         if ((features >> 32) != 0) {
196                 PMD_DRV_LOG(ERR,
197                         "only 32 bit features are allowed for legacy virtio!");
198                 return;
199         }
200         rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
201                 VIRTIO_PCI_GUEST_FEATURES);
202 }
203
204 static uint8_t
205 legacy_get_status(struct virtio_hw *hw)
206 {
207         uint8_t dst;
208
209         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
210         return dst;
211 }
212
213 static void
214 legacy_set_status(struct virtio_hw *hw, uint8_t status)
215 {
216         rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
217 }
218
219 static uint8_t
220 legacy_get_isr(struct virtio_hw *hw)
221 {
222         uint8_t dst;
223
224         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
225         return dst;
226 }
227
228 /* Enable one vector (0) for Link State Intrerrupt */
229 static uint16_t
230 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
231 {
232         uint16_t dst;
233
234         rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
235         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
236         return dst;
237 }
238
239 static uint16_t
240 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
241 {
242         uint16_t dst;
243
244         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
245                 VIRTIO_PCI_QUEUE_SEL);
246         rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
247         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
248         return dst;
249 }
250
251 static uint16_t
252 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
253 {
254         uint16_t dst;
255
256         rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
257         rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
258         return dst;
259 }
260
261 static int
262 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
263 {
264         uint32_t src;
265
266         if (!check_vq_phys_addr_ok(vq))
267                 return -1;
268
269         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
270                 VIRTIO_PCI_QUEUE_SEL);
271         src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
272         rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
273
274         return 0;
275 }
276
277 static void
278 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
279 {
280         uint32_t src = 0;
281
282         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
283                 VIRTIO_PCI_QUEUE_SEL);
284         rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
285 }
286
287 static void
288 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
289 {
290         rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
291                 VIRTIO_PCI_QUEUE_NOTIFY);
292 }
293
294 static void
295 legacy_intr_detect(struct virtio_hw *hw)
296 {
297         hw->use_msix = vtpci_msix_detect(VTPCI_DEV(hw));
298 }
299
300 const struct virtio_pci_ops legacy_ops = {
301         .read_dev_cfg   = legacy_read_dev_config,
302         .write_dev_cfg  = legacy_write_dev_config,
303         .get_status     = legacy_get_status,
304         .set_status     = legacy_set_status,
305         .get_features   = legacy_get_features,
306         .set_features   = legacy_set_features,
307         .get_isr        = legacy_get_isr,
308         .set_config_irq = legacy_set_config_irq,
309         .set_queue_irq  = legacy_set_queue_irq,
310         .get_queue_num  = legacy_get_queue_num,
311         .setup_queue    = legacy_setup_queue,
312         .del_queue      = legacy_del_queue,
313         .notify_queue   = legacy_notify_queue,
314         .intr_detect    = legacy_intr_detect,
315 };
316
317 static inline void
318 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
319 {
320         rte_write32(val & ((1ULL << 32) - 1), lo);
321         rte_write32(val >> 32,               hi);
322 }
323
324 static void
325 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
326                        void *dst, int length)
327 {
328         int i;
329         uint8_t *p;
330         uint8_t old_gen, new_gen;
331
332         do {
333                 old_gen = rte_read8(&hw->common_cfg->config_generation);
334
335                 p = dst;
336                 for (i = 0;  i < length; i++)
337                         *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
338
339                 new_gen = rte_read8(&hw->common_cfg->config_generation);
340         } while (old_gen != new_gen);
341 }
342
343 static void
344 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
345                         const void *src, int length)
346 {
347         int i;
348         const uint8_t *p = src;
349
350         for (i = 0;  i < length; i++)
351                 rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
352 }
353
354 static uint64_t
355 modern_get_features(struct virtio_hw *hw)
356 {
357         uint32_t features_lo, features_hi;
358
359         rte_write32(0, &hw->common_cfg->device_feature_select);
360         features_lo = rte_read32(&hw->common_cfg->device_feature);
361
362         rte_write32(1, &hw->common_cfg->device_feature_select);
363         features_hi = rte_read32(&hw->common_cfg->device_feature);
364
365         return ((uint64_t)features_hi << 32) | features_lo;
366 }
367
368 static void
369 modern_set_features(struct virtio_hw *hw, uint64_t features)
370 {
371         rte_write32(0, &hw->common_cfg->guest_feature_select);
372         rte_write32(features & ((1ULL << 32) - 1),
373                     &hw->common_cfg->guest_feature);
374
375         rte_write32(1, &hw->common_cfg->guest_feature_select);
376         rte_write32(features >> 32,
377                     &hw->common_cfg->guest_feature);
378 }
379
380 static uint8_t
381 modern_get_status(struct virtio_hw *hw)
382 {
383         return rte_read8(&hw->common_cfg->device_status);
384 }
385
386 static void
387 modern_set_status(struct virtio_hw *hw, uint8_t status)
388 {
389         rte_write8(status, &hw->common_cfg->device_status);
390 }
391
392 static uint8_t
393 modern_get_isr(struct virtio_hw *hw)
394 {
395         return rte_read8(hw->isr);
396 }
397
398 static uint16_t
399 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
400 {
401         rte_write16(vec, &hw->common_cfg->msix_config);
402         return rte_read16(&hw->common_cfg->msix_config);
403 }
404
405 static uint16_t
406 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
407 {
408         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
409         rte_write16(vec, &hw->common_cfg->queue_msix_vector);
410         return rte_read16(&hw->common_cfg->queue_msix_vector);
411 }
412
413 static uint16_t
414 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
415 {
416         rte_write16(queue_id, &hw->common_cfg->queue_select);
417         return rte_read16(&hw->common_cfg->queue_size);
418 }
419
420 static int
421 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
422 {
423         uint64_t desc_addr, avail_addr, used_addr;
424         uint16_t notify_off;
425
426         if (!check_vq_phys_addr_ok(vq))
427                 return -1;
428
429         desc_addr = vq->vq_ring_mem;
430         avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
431         used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
432                                                          ring[vq->vq_nentries]),
433                                    VIRTIO_PCI_VRING_ALIGN);
434
435         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
436
437         io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
438                                       &hw->common_cfg->queue_desc_hi);
439         io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
440                                        &hw->common_cfg->queue_avail_hi);
441         io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
442                                       &hw->common_cfg->queue_used_hi);
443
444         notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
445         vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
446                                 notify_off * hw->notify_off_multiplier);
447
448         rte_write16(1, &hw->common_cfg->queue_enable);
449
450         PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
451         PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
452         PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
453         PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
454         PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
455                 vq->notify_addr, notify_off);
456
457         return 0;
458 }
459
460 static void
461 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
462 {
463         rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
464
465         io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
466                                   &hw->common_cfg->queue_desc_hi);
467         io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
468                                   &hw->common_cfg->queue_avail_hi);
469         io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
470                                   &hw->common_cfg->queue_used_hi);
471
472         rte_write16(0, &hw->common_cfg->queue_enable);
473 }
474
475 static void
476 modern_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
477 {
478         uint32_t notify_data;
479
480         if (!vtpci_with_feature(hw, VIRTIO_F_NOTIFICATION_DATA)) {
481                 rte_write16(vq->vq_queue_index, vq->notify_addr);
482                 return;
483         }
484
485         if (vtpci_with_feature(hw, VIRTIO_F_RING_PACKED)) {
486                 /*
487                  * Bit[0:15]: vq queue index
488                  * Bit[16:30]: avail index
489                  * Bit[31]: avail wrap counter
490                  */
491                 notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
492                                 VRING_PACKED_DESC_F_AVAIL)) << 31) |
493                                 ((uint32_t)vq->vq_avail_idx << 16) |
494                                 vq->vq_queue_index;
495         } else {
496                 /*
497                  * Bit[0:15]: vq queue index
498                  * Bit[16:31]: avail index
499                  */
500                 notify_data = ((uint32_t)vq->vq_avail_idx << 16) |
501                                 vq->vq_queue_index;
502         }
503         rte_write32(notify_data, vq->notify_addr);
504 }
505
506
507
508 static void
509 modern_intr_detect(struct virtio_hw *hw)
510 {
511         hw->use_msix = vtpci_msix_detect(VTPCI_DEV(hw));
512 }
513
514 const struct virtio_pci_ops modern_ops = {
515         .read_dev_cfg   = modern_read_dev_config,
516         .write_dev_cfg  = modern_write_dev_config,
517         .get_status     = modern_get_status,
518         .set_status     = modern_set_status,
519         .get_features   = modern_get_features,
520         .set_features   = modern_set_features,
521         .get_isr        = modern_get_isr,
522         .set_config_irq = modern_set_config_irq,
523         .set_queue_irq  = modern_set_queue_irq,
524         .get_queue_num  = modern_get_queue_num,
525         .setup_queue    = modern_setup_queue,
526         .del_queue      = modern_del_queue,
527         .notify_queue   = modern_notify_queue,
528         .intr_detect    = modern_intr_detect,
529 };
530
531
532 void
533 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
534                       void *dst, int length)
535 {
536         VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
537 }
538
539 void
540 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
541                        const void *src, int length)
542 {
543         VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
544 }
545
546 uint64_t
547 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
548 {
549         uint64_t features;
550
551         /*
552          * Limit negotiated features to what the driver, virtqueue, and
553          * host all support.
554          */
555         features = host_features & hw->guest_features;
556         VTPCI_OPS(hw)->set_features(hw, features);
557
558         return features;
559 }
560
561 void
562 vtpci_reset(struct virtio_hw *hw)
563 {
564         VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
565         /* flush status write */
566         VTPCI_OPS(hw)->get_status(hw);
567 }
568
569 void
570 vtpci_reinit_complete(struct virtio_hw *hw)
571 {
572         vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
573 }
574
575 void
576 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
577 {
578         if (status != VIRTIO_CONFIG_STATUS_RESET)
579                 status |= VTPCI_OPS(hw)->get_status(hw);
580
581         VTPCI_OPS(hw)->set_status(hw, status);
582 }
583
584 uint8_t
585 vtpci_get_status(struct virtio_hw *hw)
586 {
587         return VTPCI_OPS(hw)->get_status(hw);
588 }
589
590 uint8_t
591 vtpci_isr(struct virtio_hw *hw)
592 {
593         return VTPCI_OPS(hw)->get_isr(hw);
594 }
595
596 static void *
597 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
598 {
599         uint8_t  bar    = cap->bar;
600         uint32_t length = cap->length;
601         uint32_t offset = cap->offset;
602         uint8_t *base;
603
604         if (bar >= PCI_MAX_RESOURCE) {
605                 PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
606                 return NULL;
607         }
608
609         if (offset + length < offset) {
610                 PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
611                         offset, length);
612                 return NULL;
613         }
614
615         if (offset + length > dev->mem_resource[bar].len) {
616                 PMD_INIT_LOG(ERR,
617                         "invalid cap: overflows bar space: %u > %" PRIu64,
618                         offset + length, dev->mem_resource[bar].len);
619                 return NULL;
620         }
621
622         base = dev->mem_resource[bar].addr;
623         if (base == NULL) {
624                 PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
625                 return NULL;
626         }
627
628         return base + offset;
629 }
630
631 static int
632 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
633 {
634         uint8_t pos;
635         struct virtio_pci_cap cap;
636         int ret;
637
638         if (rte_pci_map_device(dev)) {
639                 PMD_INIT_LOG(DEBUG, "failed to map pci device!");
640                 return -1;
641         }
642
643         ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
644         if (ret != 1) {
645                 PMD_INIT_LOG(DEBUG,
646                              "failed to read pci capability list, ret %d", ret);
647                 return -1;
648         }
649
650         while (pos) {
651                 ret = rte_pci_read_config(dev, &cap, 2, pos);
652                 if (ret != 2) {
653                         PMD_INIT_LOG(DEBUG,
654                                      "failed to read pci cap at pos: %x ret %d",
655                                      pos, ret);
656                         break;
657                 }
658
659                 if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
660                         /* Transitional devices would also have this capability,
661                          * that's why we also check if msix is enabled.
662                          * 1st byte is cap ID; 2nd byte is the position of next
663                          * cap; next two bytes are the flags.
664                          */
665                         uint16_t flags;
666
667                         ret = rte_pci_read_config(dev, &flags, sizeof(flags),
668                                         pos + 2);
669                         if (ret != sizeof(flags)) {
670                                 PMD_INIT_LOG(DEBUG,
671                                              "failed to read pci cap at pos:"
672                                              " %x ret %d", pos + 2, ret);
673                                 break;
674                         }
675
676                         if (flags & PCI_MSIX_ENABLE)
677                                 hw->use_msix = VIRTIO_MSIX_ENABLED;
678                         else
679                                 hw->use_msix = VIRTIO_MSIX_DISABLED;
680                 }
681
682                 if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
683                         PMD_INIT_LOG(DEBUG,
684                                 "[%2x] skipping non VNDR cap id: %02x",
685                                 pos, cap.cap_vndr);
686                         goto next;
687                 }
688
689                 ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
690                 if (ret != sizeof(cap)) {
691                         PMD_INIT_LOG(DEBUG,
692                                      "failed to read pci cap at pos: %x ret %d",
693                                      pos, ret);
694                         break;
695                 }
696
697                 PMD_INIT_LOG(DEBUG,
698                         "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
699                         pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
700
701                 switch (cap.cfg_type) {
702                 case VIRTIO_PCI_CAP_COMMON_CFG:
703                         hw->common_cfg = get_cfg_addr(dev, &cap);
704                         break;
705                 case VIRTIO_PCI_CAP_NOTIFY_CFG:
706                         ret = rte_pci_read_config(dev,
707                                         &hw->notify_off_multiplier,
708                                         4, pos + sizeof(cap));
709                         if (ret != 4)
710                                 PMD_INIT_LOG(DEBUG,
711                                         "failed to read notify_off_multiplier, ret %d",
712                                         ret);
713                         else
714                                 hw->notify_base = get_cfg_addr(dev, &cap);
715                         break;
716                 case VIRTIO_PCI_CAP_DEVICE_CFG:
717                         hw->dev_cfg = get_cfg_addr(dev, &cap);
718                         break;
719                 case VIRTIO_PCI_CAP_ISR_CFG:
720                         hw->isr = get_cfg_addr(dev, &cap);
721                         break;
722                 }
723
724 next:
725                 pos = cap.cap_next;
726         }
727
728         if (hw->common_cfg == NULL || hw->notify_base == NULL ||
729             hw->dev_cfg == NULL    || hw->isr == NULL) {
730                 PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
731                 return -1;
732         }
733
734         PMD_INIT_LOG(INFO, "found modern virtio pci device.");
735
736         PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
737         PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
738         PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
739         PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
740                 hw->notify_base, hw->notify_off_multiplier);
741
742         return 0;
743 }
744
745 /*
746  * Return -1:
747  *   if there is error mapping with VFIO/UIO.
748  *   if port map error when driver type is KDRV_NONE.
749  *   if marked as allowed but driver type is KDRV_UNKNOWN.
750  * Return 1 if kernel driver is managing the device.
751  * Return 0 on success.
752  */
753 int
754 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
755 {
756         RTE_BUILD_BUG_ON(offsetof(struct virtio_pci_dev, hw) != 0);
757
758         /*
759          * Try if we can succeed reading virtio pci caps, which exists
760          * only on modern pci device. If failed, we fallback to legacy
761          * virtio handling.
762          */
763         if (virtio_read_caps(dev, hw) == 0) {
764                 PMD_INIT_LOG(INFO, "modern virtio pci detected.");
765                 virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
766                 hw->bus_type = VIRTIO_BUS_PCI_MODERN;
767                 goto msix_detect;
768         }
769
770         PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
771         if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) {
772                 rte_pci_unmap_device(dev);
773                 if (dev->kdrv == RTE_PCI_KDRV_UNKNOWN &&
774                     (!dev->device.devargs ||
775                      dev->device.devargs->bus !=
776                      rte_bus_find_by_name("pci"))) {
777                         PMD_INIT_LOG(INFO,
778                                 "skip kernel managed virtio device.");
779                         return 1;
780                 }
781                 return -1;
782         }
783
784         virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
785         hw->bus_type = VIRTIO_BUS_PCI_LEGACY;
786
787 msix_detect:
788         VTPCI_OPS(hw)->intr_detect(hw);
789
790         return 0;
791 }
792