X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_pci.c;h=3b94b6fb54fd487cb728741cbe515db0bdd46d55;hb=629395b063e8278a05ea41908d1152fa68df098c;hp=29f17287472be3b6217b910032b4dba5617b030a;hpb=46a6fa87931a46153e4ed987e55e750607e03246;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 29f1728747..3b94b6fb54 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -32,8 +32,6 @@ */ #include -#include -#include #include #include @@ -47,6 +45,7 @@ #include "rte_pci_dev_ids.h" #include "eal_filesystem.h" #include "eal_private.h" +#include "eal_pci_init.h" /** * @file @@ -57,30 +56,7 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -struct pci_map { - void *addr; - uint64_t offset; - uint64_t size; - uint64_t phaddr; -}; - -/* - * For multi-process we need to reproduce all PCI mappings in secondary - * processes, so save them in a tailq. - */ -struct mapped_pci_resource { - TAILQ_ENTRY(mapped_pci_resource) next; - - struct rte_pci_addr pci_addr; - char path[PATH_MAX]; - int nb_maps; - struct pci_map maps[PCI_MAX_RESOURCE]; -}; - -TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); -static struct mapped_pci_res_list *pci_res_list; - -static int pci_parse_sysfs_value(const char *filename, uint64_t *val); +struct mapped_pci_res_list *pci_res_list = NULL; /* unbind kernel driver for this device */ static int @@ -122,7 +98,7 @@ error: } /* map a particular resource from a file */ -static void * +void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) { void *mapaddr; @@ -147,342 +123,6 @@ fail: return NULL; } -#define OFF_MAX ((uint64_t)(off_t)-1) -static int -pci_uio_get_mappings(const char *devname, struct pci_map maps[], int nb_maps) -{ - int i; - char dirname[PATH_MAX]; - char filename[PATH_MAX]; - uint64_t offset, size; - - for (i = 0; i != nb_maps; i++) { - - /* check if map directory exists */ - rte_snprintf(dirname, sizeof(dirname), - "%s/maps/map%u", devname, i); - - if (access(dirname, F_OK) != 0) - break; - - /* get mapping offset */ - rte_snprintf(filename, sizeof(filename), - "%s/offset", dirname); - if (pci_parse_sysfs_value(filename, &offset) < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot parse offset of %s\n", - __func__, dirname); - return (-1); - } - - /* get mapping size */ - rte_snprintf(filename, sizeof(filename), - "%s/size", dirname); - if (pci_parse_sysfs_value(filename, &size) < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot parse size of %s\n", - __func__, dirname); - return (-1); - } - - /* get mapping physical address */ - rte_snprintf(filename, sizeof(filename), - "%s/addr", dirname); - if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot parse addr of %s\n", - __func__, dirname); - return (-1); - } - - if ((offset > OFF_MAX) || (size > SIZE_MAX)) { - RTE_LOG(ERR, EAL, - "%s(): offset/size exceed system max value\n", - __func__); - return (-1); - } - - maps[i].offset = offset; - maps[i].size = size; - } - return (i); -} - -static int -pci_uio_map_secondary(struct rte_pci_device *dev) -{ - int fd, i; - struct mapped_pci_resource *uio_res; - - TAILQ_FOREACH(uio_res, pci_res_list, next) { - - /* skip this element if it doesn't match our PCI address */ - if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr))) - continue; - - for (i = 0; i != uio_res->nb_maps; i++) { - /* - * open devname, to mmap it - */ - fd = open(uio_res->path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - uio_res->path, strerror(errno)); - return -1; - } - - if (pci_map_resource(uio_res->maps[i].addr, fd, - (off_t)uio_res->maps[i].offset, - (size_t)uio_res->maps[i].size) - != uio_res->maps[i].addr) { - RTE_LOG(ERR, EAL, - "Cannot mmap device resource\n"); - close(fd); - return (-1); - } - /* fd is not needed in slave process, close it */ - close(fd); - } - return (0); - } - - RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); - return -1; -} - -static int -pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) -{ - FILE *f; - char filename[PATH_MAX]; - int ret; - unsigned major, minor; - dev_t dev; - - /* get the name of the sysfs file that contains the major and minor - * of the uio device and read its content */ - rte_snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", - __func__); - return -1; - } - - ret = fscanf(f, "%d:%d", &major, &minor); - if (ret != 2) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", - __func__); - fclose(f); - return -1; - } - fclose(f); - - /* create the char device "mknod /dev/uioX c major minor" */ - rte_snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev = makedev(major, minor); - ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", - __func__, strerror(errno)); - return -1; - } - - return ret; -} - -/* - * Return the uioX char device used for a pci device. On success, return - * the UIO number and fill dstbuf string with the path of the device in - * sysfs. On error, return a negative value. In this case dstbuf is - * invalid. - */ -static int -pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, - unsigned int buflen) -{ - struct rte_pci_addr *loc = &dev->addr; - unsigned int uio_num; - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - - /* depending on kernel version, uio can be located in uio/uioX - * or uio:uioX */ - - rte_snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", - loc->domain, loc->bus, loc->devid, loc->function); - - dir = opendir(dirname); - if (dir == NULL) { - /* retry with the parent directory */ - rte_snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - dir = opendir(dirname); - - if (dir == NULL) { - RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); - return -1; - } - } - - /* take the first file starting with "uio" */ - while ((e = readdir(dir)) != NULL) { - /* format could be uio%d ...*/ - int shortprefix_len = sizeof("uio") - 1; - /* ... or uio:uio%d */ - int longprefix_len = sizeof("uio:uio") - 1; - char *endptr; - - if (strncmp(e->d_name, "uio", 3) != 0) - continue; - - /* first try uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { - rte_snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); - break; - } - - /* then try uio:uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + longprefix_len)) { - rte_snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); - break; - } - } - closedir(dir); - - /* No uio resource found */ - if (e == NULL) - return -1; - - /* create uio device if we've been asked to */ - if (internal_config.create_uio_dev && pci_mknod_uio_dev(dstbuf, uio_num) < 0) - RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); - - return uio_num; -} - -/* map the PCI resource of a PCI device in virtual memory */ -static int -pci_uio_map_resource(struct rte_pci_device *dev) -{ - int i, j; - char dirname[PATH_MAX]; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - void *mapaddr; - int uio_num; - uint64_t phaddr; - uint64_t offset; - uint64_t pagesz; - int nb_maps; - struct rte_pci_addr *loc = &dev->addr; - struct mapped_pci_resource *uio_res; - struct pci_map *maps; - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* secondary processes - use already recorded details */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return (pci_uio_map_secondary(dev)); - - /* find uio resource */ - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname)); - if (uio_num < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return -1; - } - rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - return -1; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - - /* allocate the mapping details for secondary processes*/ - if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - return (-1); - } - - rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); - memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); - - /* collect info about device mappings */ - nb_maps = pci_uio_get_mappings(dirname, uio_res->maps, - RTE_DIM(uio_res->maps)); - if (nb_maps < 0) { - rte_free(uio_res); - return (nb_maps); - } - - uio_res->nb_maps = nb_maps; - - /* Map all BARs */ - pagesz = sysconf(_SC_PAGESIZE); - - maps = uio_res->maps; - for (i = 0; i != PCI_MAX_RESOURCE; i++) { - int fd; - - /* skip empty BAR */ - if ((phaddr = dev->mem_resource[i].phys_addr) == 0) - continue; - - for (j = 0; j != nb_maps && (phaddr != maps[j].phaddr || - dev->mem_resource[i].len != maps[j].size); - j++) - ; - - /* if matching map is found, then use it */ - if (j != nb_maps) { - offset = j * pagesz; - - /* - * open devname, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - return -1; - } - - if (maps[j].addr != NULL || - (mapaddr = pci_map_resource(NULL, fd, - (off_t)offset, - (size_t)maps[j].size) - ) == NULL) { - rte_free(uio_res); - close(fd); - return (-1); - } - close(fd); - - maps[j].addr = mapaddr; - maps[j].offset = offset; - dev->mem_resource[i].addr = mapaddr; - } - } - - TAILQ_INSERT_TAIL(pci_res_list, uio_res, next); - - return (0); -} - /* parse the "resource" sysfs file */ #define IORESOURCE_MEM 0x00000200 @@ -546,41 +186,6 @@ error: return -1; } -/* - * parse a sysfs file containing one integer value - * different to the eal version, as it needs to work with 64-bit values - */ -static int -pci_parse_sysfs_value(const char *filename, uint64_t *val) -{ - FILE *f; - char buf[BUFSIZ]; - char *end = NULL; - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", - __func__, filename); - return -1; - } - - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", - __func__, filename); - fclose(f); - return -1; - } - *val = strtoull(buf, &end, 0); - if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", - __func__, filename); - fclose(f); - return -1; - } - fclose(f); - return 0; -} - /* Compare two PCI device addresses. */ static int pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2) @@ -788,6 +393,30 @@ error: return -1; } +static int +pci_map_device(struct rte_pci_device *dev) +{ + int ret, mapped = 0; + + /* try mapping the NIC resources using VFIO if it exists */ +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) { + ret = pci_vfio_map_resource(dev); + if (ret == 0) + mapped = 1; + else if (ret < 0) + return ret; + } +#endif + /* map resources for devices that use igb_uio */ + if (!mapped) { + ret = pci_uio_map_resource(dev); + if (ret != 0) + return ret; + } + return 0; +} + /* * If vendor/device ID match, call the devinit() function of the * driver. @@ -795,6 +424,7 @@ error: int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) { + int ret; struct rte_pci_id *id_table; for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) { @@ -826,13 +456,14 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d if (dev->devargs != NULL && dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); - return 0; + return 1; } - if (dr->drv_flags & RTE_PCI_DRV_NEED_IGB_UIO) { + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ - if (pci_uio_map_resource(dev) < 0) - return -1; + ret = pci_map_device(dev); + if (ret != 0) + return ret; } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && rte_eal_process_type() == RTE_PROC_PRIMARY) { /* unbind current driver */ @@ -867,5 +498,21 @@ rte_eal_pci_init(void) RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); return -1; } +#ifdef VFIO_PRESENT + pci_vfio_enable(); + + if (pci_vfio_is_enabled()) { + + /* if we are primary process, create a thread to communicate with + * secondary processes. the thread will use a socket to wait for + * requests from secondary process to send open file descriptors, + * because VFIO does not allow multiple open descriptors on a group or + * VFIO container. + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + pci_vfio_mp_sync_setup() < 0) + return -1; + } +#endif return 0; }