raw/ifpga: scan PCIe BDF device tree
authorRosen Xu <rosen.xu@intel.com>
Thu, 14 Nov 2019 09:02:59 +0000 (17:02 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Wed, 20 Nov 2019 16:36:05 +0000 (17:36 +0100)
Add PCIe BDF devices tree scan for ipn3ke.

Signed-off-by: Rosen Xu <rosen.xu@intel.com>
Signed-off-by: Andy Pei <andy.pei@intel.com>
drivers/raw/ifpga/ifpga_rawdev.c
drivers/raw/ifpga/ifpga_rawdev.h

index 972a2e2..63696a7 100644 (file)
@@ -8,6 +8,8 @@
 #include <unistd.h>
 #include <sys/types.h>
 #include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/epoll.h>
 #include <rte_log.h>
 #include <rte_bus.h>
 #include <rte_malloc.h>
@@ -17,7 +19,7 @@
 #include <rte_bus_pci.h>
 #include <rte_kvargs.h>
 #include <rte_alarm.h>
-
+#include <rte_interrupts.h>
 #include <rte_errno.h>
 #include <rte_per_lcore.h>
 #include <rte_memory.h>
@@ -25,6 +27,7 @@
 #include <rte_eal.h>
 #include <rte_common.h>
 #include <rte_bus_vdev.h>
+#include <rte_string_fns.h>
 
 #include "base/opae_hw_api.h"
 #include "base/opae_ifpga_hw_api.h"
 #include "ifpga_rawdev.h"
 #include "ipn3ke_rawdev_api.h"
 
+#define RTE_PCI_EXT_CAP_ID_ERR           0x01  /* Advanced Error Reporting */
+#define RTE_PCI_CFG_SPACE_SIZE           256
+#define RTE_PCI_CFG_SPACE_EXP_SIZE       4096
+#define RTE_PCI_EXT_CAP_ID(header)       (int)(header & 0x0000ffff)
+#define RTE_PCI_EXT_CAP_NEXT(header)     ((header >> 20) & 0xffc)
+
 int ifpga_rawdev_logtype;
 
 #define PCI_VENDOR_ID_INTEL          0x8086
@@ -64,6 +73,494 @@ static const struct rte_pci_id pci_ifpga_map[] = {
        { .vendor_id = 0, /* sentinel */ },
 };
 
+static struct ifpga_rawdev ifpga_rawdevices[IFPGA_RAWDEV_NUM];
+
+static int ifpga_monitor_start;
+static pthread_t ifpga_monitor_start_thread;
+
+static struct ifpga_rawdev *
+ifpga_rawdev_allocate(struct rte_rawdev *rawdev);
+static int set_surprise_link_check_aer(
+               struct ifpga_rawdev *ifpga_rdev, int force_disable);
+static int ifpga_pci_find_next_ext_capability(unsigned int fd,
+               int start, int cap);
+static int ifpga_pci_find_ext_capability(unsigned int fd, int cap);
+
+struct ifpga_rawdev *
+ifpga_rawdev_get(const struct rte_rawdev *rawdev)
+{
+       struct ifpga_rawdev *dev;
+       unsigned int i;
+
+       if (rawdev == NULL)
+               return NULL;
+
+       for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
+               dev = &ifpga_rawdevices[i];
+               if (dev->rawdev == rawdev)
+                       return dev;
+       }
+
+       return NULL;
+}
+
+static inline uint8_t
+ifpga_rawdev_find_free_device_index(void)
+{
+       uint16_t dev_id;
+
+       for (dev_id = 0; dev_id < IFPGA_RAWDEV_NUM; dev_id++) {
+               if (ifpga_rawdevices[dev_id].rawdev == NULL)
+                       return dev_id;
+       }
+
+       return IFPGA_RAWDEV_NUM;
+}
+static struct ifpga_rawdev *
+ifpga_rawdev_allocate(struct rte_rawdev *rawdev)
+{
+       struct ifpga_rawdev *dev;
+       uint16_t dev_id;
+
+       dev = ifpga_rawdev_get(rawdev);
+       if (dev != NULL) {
+               IFPGA_RAWDEV_PMD_ERR("Event device already allocated!");
+               return NULL;
+       }
+
+       dev_id = ifpga_rawdev_find_free_device_index();
+       if (dev_id == IFPGA_RAWDEV_NUM) {
+               IFPGA_RAWDEV_PMD_ERR("Reached maximum number of raw devices");
+               return NULL;
+       }
+
+       dev = &ifpga_rawdevices[dev_id];
+       dev->rawdev = rawdev;
+       dev->dev_id = dev_id;
+
+       return dev;
+}
+
+static int ifpga_pci_find_next_ext_capability(unsigned int fd,
+int start, int cap)
+{
+       uint32_t header;
+       int ttl;
+       int pos = RTE_PCI_CFG_SPACE_SIZE;
+       int ret;
+
+       /* minimum 8 bytes per capability */
+       ttl = (RTE_PCI_CFG_SPACE_EXP_SIZE - RTE_PCI_CFG_SPACE_SIZE) / 8;
+
+       if (start)
+               pos = start;
+       ret = pread(fd, &header, sizeof(header), pos);
+       if (ret == -1)
+               return -1;
+
+       /*
+        * If we have no capabilities, this is indicated by cap ID,
+        * cap version and next pointer all being 0.
+        */
+       if (header == 0)
+               return 0;
+
+       while (ttl-- > 0) {
+               if (RTE_PCI_EXT_CAP_ID(header) == cap && pos != start)
+                       return pos;
+
+               pos = RTE_PCI_EXT_CAP_NEXT(header);
+               if (pos < RTE_PCI_CFG_SPACE_SIZE)
+                       break;
+               ret = pread(fd, &header, sizeof(header), pos);
+               if (ret == -1)
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int ifpga_pci_find_ext_capability(unsigned int fd, int cap)
+{
+       return ifpga_pci_find_next_ext_capability(fd, 0, cap);
+}
+
+static int ifpga_get_dev_vendor_id(const char *bdf,
+       uint32_t *dev_id, uint32_t *vendor_id)
+{
+       int fd;
+       char path[1024];
+       int ret;
+       uint32_t header;
+
+       strlcpy(path, "/sys/bus/pci/devices/", sizeof(path));
+       strlcat(path, bdf, sizeof(path));
+       strlcat(path, "/config", sizeof(path));
+       fd = open(path, O_RDWR);
+       if (fd < 0)
+               return -1;
+       ret = pread(fd, &header, sizeof(header), 0);
+       if (ret == -1) {
+               close(fd);
+               return -1;
+       }
+       (*vendor_id) = header & 0xffff;
+       (*dev_id) = (header >> 16) & 0xffff;
+       close(fd);
+
+       return 0;
+}
+static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev,
+       const char *bdf)
+{
+       char path[1024] = "/sys/bus/pci/devices/0000:";
+       char link[1024], link1[1024];
+       char dir[1024] = "/sys/devices/";
+       char *c;
+       int ret;
+       char sub_brg_bdf[4][16];
+       int point;
+       DIR *dp = NULL;
+       struct dirent *entry;
+       int i, j;
+
+       unsigned int dom, bus, dev;
+       int func;
+       uint32_t dev_id, vendor_id;
+
+       strlcat(path, bdf, sizeof(path));
+       memset(link, 0, sizeof(link));
+       memset(link1, 0, sizeof(link1));
+       ret = readlink(path, link, (sizeof(link)-1));
+       if (ret == -1)
+               return -1;
+       strlcpy(link1, link, sizeof(link1));
+       memset(ifpga_dev->parent_bdf, 0, 16);
+       point = strlen(link);
+       if (point < 39)
+               return -1;
+       point -= 39;
+       link[point] = 0;
+       if (point < 12)
+               return -1;
+       point -= 12;
+       rte_memcpy(ifpga_dev->parent_bdf, &link[point], 12);
+
+       point = strlen(link1);
+       if (point < 26)
+               return -1;
+       point -= 26;
+       link1[point] = 0;
+       if (point < 12)
+               return -1;
+       point -= 12;
+       c = strchr(link1, 'p');
+       if (!c)
+               return -1;
+       strlcat(dir, c, sizeof(dir));
+
+       /* scan folder */
+       dp = opendir(dir);
+       if (dp == NULL)
+               return -1;
+       i = 0;
+       while ((entry = readdir(dp)) != NULL) {
+               if (i >= 4)
+                       break;
+               if (entry->d_name[0] == '.')
+                       continue;
+               if (strlen(entry->d_name) > 12)
+                       continue;
+               if (sscanf(entry->d_name, "%x:%x:%x.%d",
+                       &dom, &bus, &dev, &func) < 4)
+                       continue;
+               else {
+                       strlcpy(sub_brg_bdf[i],
+                               entry->d_name,
+                               sizeof(sub_brg_bdf[i]));
+                       i++;
+               }
+       }
+       closedir(dp);
+
+       /* get fpga and fvl */
+       j = 0;
+       for (i = 0; i < 4; i++) {
+               strlcpy(link, dir, sizeof(link));
+               strlcat(link, "/", sizeof(link));
+               strlcat(link, sub_brg_bdf[i], sizeof(link));
+               dp = opendir(link);
+               if (dp == NULL)
+                       return -1;
+               while ((entry = readdir(dp)) != NULL) {
+                       if (j >= 8)
+                               break;
+                       if (entry->d_name[0] == '.')
+                               continue;
+
+                       if (strlen(entry->d_name) > 12)
+                               continue;
+                       if (sscanf(entry->d_name, "%x:%x:%x.%d",
+                               &dom, &bus, &dev, &func) < 4)
+                               continue;
+                       else {
+                               if (ifpga_get_dev_vendor_id(entry->d_name,
+                                       &dev_id, &vendor_id))
+                                       continue;
+                               if (vendor_id == 0x8086 &&
+                                       (dev_id == 0x0CF8 ||
+                                       dev_id == 0x0D58 ||
+                                       dev_id == 0x1580)) {
+                                       strlcpy(ifpga_dev->fvl_bdf[j],
+                                               entry->d_name,
+                                               sizeof(ifpga_dev->fvl_bdf[j]));
+                                       j++;
+                               }
+                       }
+               }
+               closedir(dp);
+       }
+
+       return 0;
+}
+
+#define HIGH_FATAL(_sens, value)\
+       (((_sens)->flags & OPAE_SENSOR_HIGH_FATAL_VALID) &&\
+        (value > (_sens)->high_fatal))
+
+#define HIGH_WARN(_sens, value)\
+       (((_sens)->flags & OPAE_SENSOR_HIGH_WARN_VALID) &&\
+        (value > (_sens)->high_warn))
+
+#define LOW_FATAL(_sens, value)\
+       (((_sens)->flags & OPAE_SENSOR_LOW_FATAL_VALID) &&\
+        (value > (_sens)->low_fatal))
+
+#define LOW_WARN(_sens, value)\
+       (((_sens)->flags & OPAE_SENSOR_LOW_WARN_VALID) &&\
+        (value > (_sens)->low_warn))
+
+#define AUX_VOLTAGE_WARN 11400
+
+static int
+ifpga_monitor_sensor(struct rte_rawdev *raw_dev,
+              bool *gsd_start)
+{
+       struct opae_adapter *adapter;
+       struct opae_manager *mgr;
+       struct opae_sensor_info *sensor;
+       unsigned int value;
+       int ret;
+
+       adapter = ifpga_rawdev_get_priv(raw_dev);
+       if (!adapter)
+               return -ENODEV;
+
+       mgr = opae_adapter_get_mgr(adapter);
+       if (!mgr)
+               return -ENODEV;
+
+       opae_mgr_for_each_sensor(sensor) {
+               if (!(sensor->flags & OPAE_SENSOR_VALID))
+                       goto fail;
+
+               ret = opae_mgr_get_sensor_value(mgr, sensor, &value);
+               if (ret)
+                       goto fail;
+
+               if (value == 0xdeadbeef) {
+                       IFPGA_RAWDEV_PMD_ERR("sensor %s is invalid value %x\n",
+                                       sensor->name, value);
+                       continue;
+               }
+
+               /* monitor temperature sensors */
+               if (!strcmp(sensor->name, "Board Temperature") ||
+                               !strcmp(sensor->name, "FPGA Die Temperature")) {
+                       IFPGA_RAWDEV_PMD_INFO("read sensor %s %d %d %d\n",
+                                       sensor->name, value, sensor->high_warn,
+                                       sensor->high_fatal);
+
+                       if (HIGH_WARN(sensor, value) ||
+                               LOW_WARN(sensor, value)) {
+                               IFPGA_RAWDEV_PMD_INFO("%s reach theshold %d\n",
+                                       sensor->name, value);
+                               *gsd_start = true;
+                               break;
+                       }
+               }
+
+               /* monitor 12V AUX sensor */
+               if (!strcmp(sensor->name, "12V AUX Voltage")) {
+                       if (value < AUX_VOLTAGE_WARN) {
+                               IFPGA_RAWDEV_PMD_INFO("%s reach theshold %d\n",
+                                               sensor->name, value);
+                               *gsd_start = true;
+                               break;
+                       }
+               }
+       }
+
+       return 0;
+fail:
+       return -EFAULT;
+}
+
+static int set_surprise_link_check_aer(
+       struct ifpga_rawdev *ifpga_rdev, int force_disable)
+{
+       struct rte_rawdev *rdev;
+       int fd = -1;
+       char path[1024];
+       int pos;
+       int ret;
+       uint32_t data;
+       bool enable = 0;
+       uint32_t aer_new0, aer_new1;
+
+       if (!ifpga_rdev) {
+               printf("\n device does not exist\n");
+               return -EFAULT;
+       }
+
+       rdev = ifpga_rdev->rawdev;
+       if (ifpga_rdev->aer_enable)
+               return -EFAULT;
+       if (ifpga_monitor_sensor(rdev, &enable))
+               return -EFAULT;
+       if (enable || force_disable) {
+               IFPGA_RAWDEV_PMD_ERR("Set AER, pls graceful shutdown\n");
+               ifpga_rdev->aer_enable = 1;
+               /* get bridge fd */
+               strlcpy(path, "/sys/bus/pci/devices/", sizeof(path));
+               strlcat(path, ifpga_rdev->parent_bdf, sizeof(path));
+               strlcat(path, "/config", sizeof(path));
+               fd = open(path, O_RDWR);
+               if (fd < 0)
+                       goto end;
+               pos = ifpga_pci_find_ext_capability(fd, RTE_PCI_EXT_CAP_ID_ERR);
+               if (!pos)
+                       goto end;
+               /* save previout ECAP_AER+0x08 */
+               ret = pread(fd, &data, sizeof(data), pos+0x08);
+               if (ret == -1)
+                       goto end;
+               ifpga_rdev->aer_old[0] = data;
+               /* save previout ECAP_AER+0x14 */
+               ret = pread(fd, &data, sizeof(data), pos+0x14);
+               if (ret == -1)
+                       goto end;
+               ifpga_rdev->aer_old[1] = data;
+
+               /* set ECAP_AER+0x08 to 0xFFFFFFFF */
+               data = 0xffffffff;
+               ret = pwrite(fd, &data, 4, pos+0x08);
+               if (ret == -1)
+                       goto end;
+               /* set ECAP_AER+0x14 to 0xFFFFFFFF */
+               ret = pwrite(fd, &data, 4, pos+0x14);
+               if (ret == -1)
+                       goto end;
+
+               /* read current ECAP_AER+0x08 */
+               ret = pread(fd, &data, sizeof(data), pos+0x08);
+               if (ret == -1)
+                       goto end;
+               aer_new0 = data;
+               /* read current ECAP_AER+0x14 */
+               ret = pread(fd, &data, sizeof(data), pos+0x14);
+               if (ret == -1)
+                       goto end;
+               aer_new1 = data;
+
+               if (fd != -1)
+                       close(fd);
+
+               printf(">>>>>>Set AER %x,%x %x,%x\n",
+                       ifpga_rdev->aer_old[0], ifpga_rdev->aer_old[1],
+                       aer_new0, aer_new1);
+
+               return 1;
+               }
+
+end:
+       if (fd != -1)
+               close(fd);
+       return -EFAULT;
+}
+
+static void *
+ifpga_rawdev_gsd_handle(__rte_unused void *param)
+{
+       struct ifpga_rawdev *ifpga_rdev;
+       int i;
+       int gsd_enable, ret;
+#define MS 1000
+
+       while (1) {
+               gsd_enable = 0;
+               for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
+                       ifpga_rdev = &ifpga_rawdevices[i];
+                       if (ifpga_rdev->rawdev) {
+                               ret = set_surprise_link_check_aer(ifpga_rdev,
+                                       gsd_enable);
+                               if (ret == 1 && !gsd_enable) {
+                                       gsd_enable = 1;
+                                       i = -1;
+                               }
+                       }
+               }
+
+               if (gsd_enable)
+                       printf(">>>>>>Pls Shutdown APP\n");
+
+               rte_delay_us(100 * MS);
+       }
+
+       return NULL;
+}
+
+static int
+ifpga_monitor_start_func(void)
+{
+       int ret;
+
+       if (ifpga_monitor_start == 0) {
+               ret = pthread_create(&ifpga_monitor_start_thread,
+                       NULL,
+                       ifpga_rawdev_gsd_handle, NULL);
+               if (ret) {
+                       IFPGA_RAWDEV_PMD_ERR(
+                               "Fail to create ifpga nonitor thread");
+                       return -1;
+               }
+               ifpga_monitor_start = 1;
+       }
+
+       return 0;
+}
+static int
+ifpga_monitor_stop_func(void)
+{
+       int ret;
+
+       if (ifpga_monitor_start == 1) {
+               ret = pthread_cancel(ifpga_monitor_start_thread);
+               if (ret)
+                       IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread");
+
+               ret = pthread_join(ifpga_monitor_start_thread, NULL);
+               if (ret)
+                       IFPGA_RAWDEV_PMD_ERR("Can't join the thread");
+
+               ifpga_monitor_start = 0;
+
+               return ret;
+       }
+
+       return 0;
+}
+
 static int
 ifpga_fill_afu_dev(struct opae_accelerator *acc,
                struct rte_afu_device *afu_dev)
@@ -372,8 +869,9 @@ ifpga_rawdev_pr(struct rte_rawdev *dev,
        if (ret)
                return ret;
 
-       memcpy(&afu_pr_conf->afu_id.uuid.uuid_low, uuid.b, sizeof(u64));
-       memcpy(&afu_pr_conf->afu_id.uuid.uuid_high, uuid.b + 8, sizeof(u64));
+       rte_memcpy(&afu_pr_conf->afu_id.uuid.uuid_low, uuid.b, sizeof(u64));
+       rte_memcpy(&afu_pr_conf->afu_id.uuid.uuid_high,
+               uuid.b + 8, sizeof(u64));
 
        IFPGA_RAWDEV_PMD_INFO("%s: uuid_l=0x%lx, uuid_h=0x%lx\n", __func__,
                (unsigned long)afu_pr_conf->afu_id.uuid.uuid_low,
@@ -838,6 +1336,7 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
 {
        int ret = 0;
        struct rte_rawdev *rawdev = NULL;
+       struct ifpga_rawdev *dev = NULL;
        struct opae_adapter *adapter = NULL;
        struct opae_manager *mgr = NULL;
        struct opae_adapter_data_pci *data = NULL;
@@ -851,7 +1350,7 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
        }
 
        memset(name, 0, sizeof(name));
-       snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%x:%02x.%x",
+       snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%02x:%02x.%x",
                pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
 
        IFPGA_RAWDEV_PMD_INFO("Init %s on NUMA node %d", name, rte_socket_id());
@@ -865,6 +1364,14 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
                goto cleanup;
        }
 
+       dev = ifpga_rawdev_allocate(rawdev);
+       if (dev == NULL) {
+               IFPGA_RAWDEV_PMD_ERR("Unable to allocate ifpga_rawdevice");
+               ret = -EINVAL;
+               goto cleanup;
+       }
+       dev->aer_enable = 0;
+
        /* alloc OPAE_FPGA_PCI data to register to OPAE hardware level API */
        data = opae_adapter_data_alloc(OPAE_FPGA_PCI);
        if (!data) {
@@ -983,6 +1490,7 @@ ifpga_rawdev_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 static int
 ifpga_rawdev_pci_remove(struct rte_pci_device *pci_dev)
 {
+       ifpga_monitor_stop_func();
        return ifpga_rawdev_destroy(pci_dev);
 }
 
@@ -1014,13 +1522,32 @@ static const char * const valid_args[] = {
        NULL
 };
 
+static int ifpga_rawdev_get_string_arg(const char *key __rte_unused,
+       const char *value, void *extra_args)
+{
+       int size;
+       if (!value || !extra_args)
+               return -EINVAL;
+
+       size = strlen(value) + 1;
+       *(char **)extra_args = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE);
+       if (!*(char **)extra_args)
+               return -ENOMEM;
+
+       strlcpy(*(char **)extra_args, value, size);
+
+       return 0;
+}
 static int
 ifpga_cfg_probe(struct rte_vdev_device *dev)
 {
        struct rte_devargs *devargs;
        struct rte_kvargs *kvlist = NULL;
+       struct rte_rawdev *rawdev = NULL;
+       struct ifpga_rawdev *ifpga_dev;
        int port;
        char *name = NULL;
+       const char *bdf;
        char dev_name[RTE_RAWDEV_NAME_MAX_LEN];
        int ret = -1;
 
@@ -1034,7 +1561,8 @@ ifpga_cfg_probe(struct rte_vdev_device *dev)
 
        if (rte_kvargs_count(kvlist, IFPGA_ARG_NAME) == 1) {
                if (rte_kvargs_process(kvlist, IFPGA_ARG_NAME,
-                                      &rte_ifpga_get_string_arg, &name) < 0) {
+                                      &ifpga_rawdev_get_string_arg,
+                                      &name) < 0) {
                        IFPGA_RAWDEV_PMD_ERR("error to parse %s",
                                     IFPGA_ARG_NAME);
                        goto end;
@@ -1060,6 +1588,19 @@ ifpga_cfg_probe(struct rte_vdev_device *dev)
                goto end;
        }
 
+       memset(dev_name, 0, sizeof(dev_name));
+       snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%s", name);
+       rawdev = rte_rawdev_pmd_get_named_dev(dev_name);
+       if (!rawdev)
+               goto end;
+       ifpga_dev = ifpga_rawdev_get(rawdev);
+       if (!ifpga_dev)
+               goto end;
+       bdf = name;
+       ifpga_rawdev_fill_info(ifpga_dev, bdf);
+
+       ifpga_monitor_start_func();
+
        memset(dev_name, 0, sizeof(dev_name));
        snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s",
        port, name);
index e153dba..bd42083 100644 (file)
@@ -46,4 +46,20 @@ ifpga_rawdev_get_priv(const struct rte_rawdev *rawdev)
        return rawdev->dev_private;
 }
 
+#define IFPGA_RAWDEV_MSIX_IRQ_NUM 7
+#define IFPGA_RAWDEV_NUM 32
+
+struct ifpga_rawdev {
+       int dev_id;
+       struct rte_rawdev *rawdev;
+       int aer_enable;
+       int intr_fd[IFPGA_RAWDEV_MSIX_IRQ_NUM+1];
+       uint32_t aer_old[2];
+       char fvl_bdf[8][16];
+       char parent_bdf[16];
+};
+
+struct ifpga_rawdev *
+ifpga_rawdev_get(const struct rte_rawdev *rawdev);
+
 #endif /* _IFPGA_RAWDEV_H_ */