X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_pci.c;h=b4dbf953afcb723c8b9746a796d45362cec31379;hb=84cc318424d49372dd2a5fbf3cf84426bf95acce;hp=9f752529b9adfa1e5a832218bab42a47f3406f6b;hpb=b8eb345378bdefc4c976eaaf5abb221e0aca3460;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 9f752529b9..b4dbf953af 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include "eal_filesystem.h" #include "eal_private.h" #include "eal_pci_init.h" +#include "eal_vfio.h" /** * @file @@ -54,44 +56,7 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} +extern struct rte_pci_bus rte_pci_bus; static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) @@ -124,7 +89,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name) /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -138,8 +103,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } break; default: RTE_LOG(DEBUG, EAL, @@ -153,12 +120,15 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_KDRV_VFIO: - RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: @@ -190,12 +160,13 @@ pci_find_max_end_va(void) return RTE_PTR_ADD(last->addr, last->len); } -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int +pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags) { - FILE *f; - char buf[BUFSIZ]; union pci_resource_info { struct { char *phys_addr; @@ -204,6 +175,31 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) }; char *ptrs[PCI_RESOURCE_FMT_NVAL]; } res_info; + + if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + errno = 0; + *phys_addr = strtoull(res_info.phys_addr, NULL, 16); + *end_addr = strtoull(res_info.end_addr, NULL, 16); + *flags = strtoull(res_info.flags, NULL, 16); + if (errno != 0) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + + return 0; +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; int i; uint64_t phys_addr, end_addr, flags; @@ -220,21 +216,9 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) "%s(): cannot read resource\n", __func__); goto error; } - - if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) goto error; - } - errno = 0; - phys_addr = strtoull(res_info.phys_addr, NULL, 16); - end_addr = strtoull(res_info.end_addr, NULL, 16); - flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - goto error; - } if (flags & IORESOURCE_MEM) { dev->mem_resource[i].phys_addr = phys_addr; @@ -253,8 +237,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -267,10 +250,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -306,6 +286,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } dev->id.subsystem_device_id = (uint16_t)tmp; + /* get class_id */ + snprintf(filename, sizeof(filename), "%s/class", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + /* the least 24 bits are valid: class, subclass, program interface */ + dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; + /* get max_vfs */ dev->max_vfs = 0; snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); @@ -321,20 +311,21 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->max_vfs = (uint16_t)tmp; } - /* get numa node */ + /* get numa node, default to 0 if not present */ snprintf(filename, sizeof(filename), "%s/numa_node", dirname); - if (access(filename, R_OK) != 0) { - /* if no NUMA support, set default to 0 */ - dev->numa_node = 0; + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; } else { - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->numa_node = tmp; + dev->device.numa_node = 0; } + pci_name_set(dev); + /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { @@ -365,40 +356,53 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->kdrv = RTE_KDRV_NONE; /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); free(dev); } return 0; } - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + + rte_pci_add_device(dev); } return 0; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr); +} + /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -426,10 +430,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = strtoul(splitaddr.domain, NULL, 16); + addr->bus = strtoul(splitaddr.bus, NULL, 16); + addr->devid = strtoul(splitaddr.devid, NULL, 16); + addr->function = strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -445,15 +449,18 @@ error: * list */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; - dir = opendir(SYSFS_PCI_DEVICES); + dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__, strerror(errno)); @@ -464,13 +471,13 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; - snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, - e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + snprintf(dirname, sizeof(dirname), "%s/%s", + pci_get_sysfs_path(), e->d_name); + + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -481,21 +488,104 @@ error: return -1; } -#ifdef RTE_PCI_CONFIG /* - * It is deprecated, all its configurations have been moved into - * each PMD respectively. + * Is pci device bound to any kdrv */ -void -pci_config_space_set(__rte_unused struct rte_pci_device *dev) +static inline int +pci_one_device_is_bound(void) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_UNKNOWN || + dev->kdrv == RTE_KDRV_NONE) { + continue; + } else { + ret = 1; + break; + } + } + return ret; +} + +/* + * Any one of the device bound to uio + */ +static inline int +pci_one_device_bound_uio(void) { - RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n"); + struct rte_pci_device *dev = NULL; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_IGB_UIO || + dev->kdrv == RTE_KDRV_UIO_GENERIC) { + return 1; + } + } + return 0; } + +/* + * Any one of the device has iova as va + */ +static inline int +pci_one_device_has_iova_va(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_pci_driver *drv = NULL; + + FOREACH_DRIVER_ON_PCIBUS(drv) { + if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_VFIO && + rte_pci_match(drv, dev)) + return 1; + } + } + } + return 0; +} + +/* + * Get iommu class of PCI devices on the bus. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void) +{ + bool is_bound; + bool is_vfio_noiommu_enabled = true; + bool has_iova_va; + bool is_bound_uio; + + is_bound = pci_one_device_is_bound(); + if (!is_bound) + return RTE_IOVA_DC; + + has_iova_va = pci_one_device_has_iova_va(); + is_bound_uio = pci_one_device_bound_uio(); +#ifdef VFIO_PRESENT + is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == true ? + true : false; #endif + if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled) + return RTE_IOVA_VA; + + if (has_iova_va) { + RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. "); + if (is_vfio_noiommu_enabled) + RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); + if (is_bound_uio) + RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); + } + + return RTE_IOVA_PA; +} + /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -519,8 +609,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -546,7 +636,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device, #if defined(RTE_ARCH_X86) static int pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) + struct rte_pci_ioport *p) { uint16_t start, end; FILE *fp; @@ -604,8 +694,8 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, #endif int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret = -1; @@ -617,8 +707,14 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, break; #endif case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_map(dev, bar, p); + break; case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#else ret = pci_uio_ioport_map(dev, bar, p); +#endif break; case RTE_KDRV_NONE: #if defined(RTE_ARCH_X86) @@ -636,8 +732,8 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -646,21 +742,24 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, break; #endif case RTE_KDRV_IGB_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; case RTE_KDRV_UIO_GENERIC: pci_uio_ioport_read(p, data, len, offset); break; - default: + case RTE_KDRV_NONE: #if defined(RTE_ARCH_X86) - /* special case for x86 ... */ pci_uio_ioport_read(p, data, len, offset); #endif break; + default: + break; } } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -669,78 +768,51 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, break; #endif case RTE_KDRV_IGB_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; case RTE_KDRV_UIO_GENERIC: pci_uio_ioport_write(p, data, len, offset); break; - default: + case RTE_KDRV_NONE: #if defined(RTE_ARCH_X86) - /* special case for x86 ... */ pci_uio_ioport_write(p, data, len, offset); #endif break; + default: + break; } } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { - int ret; + int ret = -1; switch (p->dev->kdrv) { #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - ret = -1; if (pci_vfio_is_enabled()) ret = pci_vfio_ioport_unmap(p); break; #endif case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: ret = pci_uio_ioport_unmap(p); break; - default: + case RTE_KDRV_UIO_GENERIC: #if defined(RTE_ARCH_X86) - /* special case for x86 ... nothing to do */ ret = 0; #else - ret = -1; + ret = pci_uio_ioport_unmap(p); #endif break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = 0; +#endif + break; + default: + break; } return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } -#ifdef VFIO_PRESENT - pci_vfio_enable(); - - if (pci_vfio_is_enabled()) { - - /* if we are primary process, create a thread to communicate with - * secondary processes. the thread will use a socket to wait for - * requests from secondary process to send open file descriptors, - * because VFIO does not allow multiple open descriptors on a group or - * VFIO container. - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - pci_vfio_mp_sync_setup() < 0) - return -1; - } -#endif - return 0; -}