eal: remove deprecated function for mbuf pool ops
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal.c
index 67e4c6f..511eb06 100644 (file)
@@ -1,35 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
- *   Copyright(c) 2012-2014 6WIND S.A.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation.
+ * Copyright(c) 2012-2014 6WIND S.A.
  */
 
 #include <stdio.h>
@@ -46,7 +17,6 @@
 #include <stddef.h>
 #include <errno.h>
 #include <limits.h>
-#include <errno.h>
 #include <sys/mman.h>
 #include <sys/queue.h>
 #include <sys/stat.h>
 #include <sys/io.h>
 #endif
 
+#include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_debug.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_launch.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
 #include <rte_errno.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
+#include <rte_service_component.h>
 #include <rte_log.h>
 #include <rte_random.h>
 #include <rte_cycles.h>
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_bus.h>
-#include <rte_pci.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
-#include <rte_common.h>
 #include <rte_version.h>
 #include <rte_atomic.h>
 #include <malloc_heap.h>
+#include <rte_vfio.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -104,8 +74,8 @@ static int mem_cfg_fd = -1;
 static struct flock wr_lock = {
                .l_type = F_WRLCK,
                .l_whence = SEEK_SET,
-               .l_start = offsetof(struct rte_mem_config, memseg),
-               .l_len = sizeof(early_mem_config.memseg),
+               .l_start = offsetof(struct rte_mem_config, memsegs),
+               .l_len = sizeof(early_mem_config.memsegs),
 };
 
 /* Address of global and public configuration */
@@ -122,6 +92,75 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
+{
+       const char *directory = default_runtime_dir;
+       const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+       const char *fallback = "/tmp";
+       char tmp[PATH_MAX];
+       int ret;
+
+       if (getuid() != 0) {
+               /* try XDG path first, fall back to /tmp */
+               if (xdg_runtime_dir != NULL)
+                       directory = xdg_runtime_dir;
+               else
+                       directory = fallback;
+       }
+       /* create DPDK subdirectory under runtime dir */
+       ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+       if (ret < 0 || ret == sizeof(tmp)) {
+               RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+               return -1;
+       }
+
+       /* create prefix-specific subdirectory under DPDK runtime dir */
+       ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+                       tmp, internal_config.hugefile_prefix);
+       if (ret < 0 || ret == sizeof(runtime_dir)) {
+               RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+               return -1;
+       }
+
+       /* create the path if it doesn't exist. no "mkdir -p" here, so do it
+        * step by step.
+        */
+       ret = mkdir(tmp, 0700);
+       if (ret < 0 && errno != EEXIST) {
+               RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+                       tmp, strerror(errno));
+               return -1;
+       }
+
+       ret = mkdir(runtime_dir, 0700);
+       if (ret < 0 && errno != EEXIST) {
+               RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+                       runtime_dir, strerror(errno));
+               return -1;
+       }
+
+       return 0;
+}
+
+const char *
+eal_get_runtime_dir(void)
+{
+       return runtime_dir;
+}
+
+/* Return user provided mbuf pool ops name */
+const char * __rte_experimental
+rte_eal_mbuf_user_pool_ops(void)
+{
+       return internal_config.user_mbuf_pool_ops_name;
+}
+
 /* Return a pointer to the configuration structure */
 struct rte_config *
 rte_eal_get_configuration(void)
@@ -129,6 +168,12 @@ rte_eal_get_configuration(void)
        return &rte_config;
 }
 
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+       return rte_eal_get_configuration()->iova_mode;
+}
+
 /* parse a sysfs (or other) file containing one integer value */
 int
 eal_parse_sysfs_value(const char *filename, unsigned long *val)
@@ -212,7 +257,7 @@ rte_eal_config_create(void)
                rte_panic("Cannot mmap memory for rte_config\n");
        }
        memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
-       rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+       rte_config.mem_config = rte_mem_cfg_addr;
 
        /* store address of the config in the config itself so that secondary
         * processes could later map the config into this exact location */
@@ -289,12 +334,17 @@ eal_proc_type_detect(void)
        enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
        const char *pathname = eal_runtime_config_path();
 
-       /* if we can open the file but not get a write-lock we are a secondary
-        * process. NOTE: if we get a file handle back, we keep that open
-        * and don't close it to prevent a race condition between multiple opens */
-       if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-                       (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-               ptype = RTE_PROC_SECONDARY;
+       /* if there no shared config, there can be no secondary processes */
+       if (!internal_config.no_shconf) {
+               /* if we can open the file but not get a write-lock we are a
+                * secondary process. NOTE: if we get a file handle back, we
+                * keep that open and don't close it to prevent a race condition
+                * between multiple opens.
+                */
+               if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+                               (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+                       ptype = RTE_PROC_SECONDARY;
+       }
 
        RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
                        ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -350,12 +400,14 @@ eal_usage(const char *prgname)
        eal_common_usage();
        printf("EAL Linux options:\n"
               "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
+              "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
               "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
               "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
               "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
               "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
               "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
-              "  --"OPT_XEN_DOM0"          Support running on Xen dom0 without hugetlbfs\n"
+              "  --"OPT_LEGACY_MEM"        Legacy memory mode (no dynamic allocation, contiguous segments)\n"
+              "  --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
               "\n");
        /* Allow the application to print its usage message too if hook is set */
        if ( rte_application_usage_hook ) {
@@ -378,46 +430,45 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func )
 }
 
 static int
-eal_parse_socket_mem(char *socket_mem)
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
 {
        char * arg[RTE_MAX_NUMA_NODES];
        char *end;
        int arg_num, i, len;
        uint64_t total_mem = 0;
 
-       len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+       len = strnlen(strval, SOCKET_MEM_STRLEN);
        if (len == SOCKET_MEM_STRLEN) {
                RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
                return -1;
        }
 
        /* all other error cases will be caught later */
-       if (!isdigit(socket_mem[len-1]))
+       if (!isdigit(strval[len-1]))
                return -1;
 
        /* split the optarg into separate socket values */
-       arg_num = rte_strsplit(socket_mem, len,
+       arg_num = rte_strsplit(strval, len,
                        arg, RTE_MAX_NUMA_NODES, ',');
 
        /* if split failed, or 0 arguments */
        if (arg_num <= 0)
                return -1;
 
-       internal_config.force_sockets = 1;
-
        /* parse each defined socket option */
        errno = 0;
        for (i = 0; i < arg_num; i++) {
+               uint64_t val;
                end = NULL;
-               internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+               val = strtoull(arg[i], &end, 10);
 
                /* check for invalid input */
                if ((errno != 0)  ||
                                (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
                        return -1;
-               internal_config.socket_mem[i] *= 1024ULL;
-               internal_config.socket_mem[i] *= 1024ULL;
-               total_mem += internal_config.socket_mem[i];
+               val <<= 20;
+               total_mem += val;
+               socket_arg[i] = val;
        }
 
        /* check if we have a positive amount of total memory */
@@ -492,8 +543,6 @@ eal_log_level_parse(int argc, char **argv)
        argvopt = argv;
        optind = 1;
 
-       eal_reset_internal_config(&internal_config);
-
        while ((opt = getopt_long(argc, argvopt, eal_short_options,
                                  eal_long_options, &option_index)) != EOF) {
 
@@ -558,35 +607,36 @@ eal_parse_args(int argc, char **argv)
                        eal_usage(prgname);
                        exit(EXIT_SUCCESS);
 
-               /* long options */
-               case OPT_XEN_DOM0_NUM:
-#ifdef RTE_LIBRTE_XEN_DOM0
-                       internal_config.xen_dom0_support = 1;
-#else
-                       RTE_LOG(ERR, EAL, "Can't support DPDK app "
-                               "running on Dom0, please configure"
-                               " RTE_LIBRTE_XEN_DOM0=y\n");
-                       ret = -1;
-                       goto out;
-#endif
-                       break;
-
                case OPT_HUGE_DIR_NUM:
-                       internal_config.hugepage_dir = optarg;
+                       internal_config.hugepage_dir = strdup(optarg);
                        break;
 
                case OPT_FILE_PREFIX_NUM:
-                       internal_config.hugefile_prefix = optarg;
+                       internal_config.hugefile_prefix = strdup(optarg);
                        break;
 
                case OPT_SOCKET_MEM_NUM:
-                       if (eal_parse_socket_mem(optarg) < 0) {
+                       if (eal_parse_socket_arg(optarg,
+                                       internal_config.socket_mem) < 0) {
                                RTE_LOG(ERR, EAL, "invalid parameters for --"
                                                OPT_SOCKET_MEM "\n");
                                eal_usage(prgname);
                                ret = -1;
                                goto out;
                        }
+                       internal_config.force_sockets = 1;
+                       break;
+
+               case OPT_SOCKET_LIMIT_NUM:
+                       if (eal_parse_socket_arg(optarg,
+                                       internal_config.socket_limit) < 0) {
+                               RTE_LOG(ERR, EAL, "invalid parameters for --"
+                                               OPT_SOCKET_LIMIT "\n");
+                               eal_usage(prgname);
+                               ret = -1;
+                               goto out;
+                       }
+                       internal_config.force_socket_limits = 1;
                        break;
 
                case OPT_BASE_VIRTADDR_NUM:
@@ -613,6 +663,11 @@ eal_parse_args(int argc, char **argv)
                        internal_config.create_uio_dev = 1;
                        break;
 
+               case OPT_MBUF_POOL_OPS_NAME_NUM:
+                       internal_config.user_mbuf_pool_ops_name =
+                           strdup(optarg);
+                       break;
+
                default:
                        if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
                                RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -632,22 +687,21 @@ eal_parse_args(int argc, char **argv)
                }
        }
 
-       if (eal_adjust_config(&internal_config) != 0) {
+       /* create runtime data directory */
+       if (internal_config.no_shconf == 0 &&
+                       eal_create_runtime_dir() < 0) {
+               RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
                ret = -1;
                goto out;
        }
 
-       /* sanity checks */
-       if (eal_check_common_options(&internal_config) != 0) {
-               eal_usage(prgname);
+       if (eal_adjust_config(&internal_config) != 0) {
                ret = -1;
                goto out;
        }
 
-       /* --xen-dom0 doesn't make sense with --socket-mem */
-       if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
-               RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
-                       "together with --"OPT_XEN_DOM0"\n");
+       /* sanity checks */
+       if (eal_check_common_options(&internal_config) != 0) {
                eal_usage(prgname);
                ret = -1;
                goto out;
@@ -666,23 +720,23 @@ out:
        return ret;
 }
 
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+       int *socket_id = arg;
+
+       return *socket_id == msl->socket_id;
+}
+
 static void
 eal_check_mem_on_local_socket(void)
 {
-       const struct rte_memseg *ms;
-       int i, socket_id;
+       int socket_id;
 
        socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
 
-       ms = rte_eal_get_physmem_layout();
-
-       for (i = 0; i < RTE_MAX_MEMSEG; i++)
-               if (ms[i].socket_id == socket_id &&
-                               ms[i].len > 0)
-                       return;
-
-       RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
-                       "memory on local socket!\n");
+       if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+               RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
 }
 
 static int
@@ -697,6 +751,8 @@ rte_eal_mcfg_complete(void)
        /* ALL shared mem_config related INIT DONE */
        if (rte_config.process_type == RTE_PROC_PRIMARY)
                rte_config.mem_config->magic = RTE_MAGIC;
+
+       internal_config.init_complete = 1;
 }
 
 /*
@@ -717,25 +773,8 @@ rte_eal_iopl_init(void)
 #ifdef VFIO_PRESENT
 static int rte_eal_vfio_setup(void)
 {
-       int vfio_enabled = 0;
-
-       if (!internal_config.no_pci) {
-               pci_vfio_enable();
-               vfio_enabled |= pci_vfio_is_enabled();
-       }
-
-       if (vfio_enabled) {
-
-               /* if we are primary process, create a thread to communicate with
-                * secondary processes. the thread will use a socket to wait for
-                * requests from secondary process to send open file descriptors,
-                * because VFIO does not allow multiple open descriptors on a group or
-                * VFIO container.
-                */
-               if (internal_config.process_type == RTE_PROC_PRIMARY &&
-                               vfio_mp_sync_setup() < 0)
-                       return -1;
-       }
+       if (rte_vfio_enable("vfio"))
+               return -1;
 
        return 0;
 }
@@ -765,18 +804,21 @@ rte_eal_init(int argc, char **argv)
                return -1;
        }
 
-       if (!rte_atomic32_test_and_set(&run_once))
+       if (!rte_atomic32_test_and_set(&run_once)) {
+               rte_eal_init_alert("already called initialization.");
+               rte_errno = EALREADY;
                return -1;
+       }
 
        logid = strrchr(argv[0], '/');
        logid = strdup(logid ? logid + 1: argv[0]);
 
        thread_id = pthread_self();
 
-       eal_log_level_parse(argc, argv);
+       eal_reset_internal_config(&internal_config);
 
        /* set log level as early as possible */
-       rte_set_log_level(internal_config.log_level);
+       eal_log_level_parse(argc, argv);
 
        if (rte_eal_cpu_init() < 0) {
                rte_eal_init_alert("Cannot detect lcores.");
@@ -785,14 +827,75 @@ rte_eal_init(int argc, char **argv)
        }
 
        fctret = eal_parse_args(argc, argv);
-       if (fctret < 0)
-               exit(1);
+       if (fctret < 0) {
+               rte_eal_init_alert("Invalid 'command line' arguments.");
+               rte_errno = EINVAL;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
 
-       if (internal_config.no_hugetlbfs == 0 &&
-                       internal_config.process_type != RTE_PROC_SECONDARY &&
-                       internal_config.xen_dom0_support == 0 &&
-                       eal_hugepage_info_init() < 0)
-               rte_panic("Cannot get hugepage information\n");
+       if (eal_plugins_init() < 0) {
+               rte_eal_init_alert("Cannot init plugins\n");
+               rte_errno = EINVAL;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
+
+       if (eal_option_device_parse()) {
+               rte_errno = ENODEV;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
+
+       rte_config_init();
+
+       if (rte_eal_intr_init() < 0) {
+               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               return -1;
+       }
+
+       /* Put mp channel init before bus scan so that we can init the vdev
+        * bus through mp channel in the secondary process before the bus scan.
+        */
+       if (rte_mp_channel_init() < 0) {
+               rte_eal_init_alert("failed to init mp channel\n");
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+                       rte_errno = EFAULT;
+                       return -1;
+               }
+       }
+
+       if (rte_bus_scan()) {
+               rte_eal_init_alert("Cannot scan the buses for devices\n");
+               rte_errno = ENODEV;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
+
+       /* autodetect the iova mapping mode (default is iova_pa) */
+       rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+
+       /* Workaround for KNI which requires physical address to work */
+       if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+                       rte_eal_check_module("rte_kni") == 1) {
+               rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+               RTE_LOG(WARNING, EAL,
+                       "Some devices want IOVA as VA but PA will be used because.. "
+                       "KNI module inserted\n");
+       }
+
+       if (internal_config.no_hugetlbfs == 0) {
+               /* rte_config isn't initialized yet */
+               ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+                               eal_hugepage_info_init() :
+                               eal_hugepage_info_read();
+               if (ret < 0) {
+                       rte_eal_init_alert("Cannot get hugepage information.");
+                       rte_errno = EACCES;
+                       rte_atomic32_clear(&run_once);
+                       return -1;
+               }
+       }
 
        if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
                if (internal_config.no_hugetlbfs)
@@ -812,56 +915,74 @@ rte_eal_init(int argc, char **argv)
 
        rte_srand(rte_rdtsc());
 
-       rte_config_init();
-
-       if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
-               rte_panic("Cannot init logs\n");
-
-       if (rte_eal_pci_init() < 0)
-               rte_panic("Cannot init PCI\n");
+       if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
+               rte_eal_init_alert("Cannot init logging.");
+               rte_errno = ENOMEM;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
 
 #ifdef VFIO_PRESENT
-       if (rte_eal_vfio_setup() < 0)
-               rte_panic("Cannot init VFIO\n");
+       if (rte_eal_vfio_setup() < 0) {
+               rte_eal_init_alert("Cannot init VFIO\n");
+               rte_errno = EAGAIN;
+               rte_atomic32_clear(&run_once);
+               return -1;
+       }
 #endif
+       /* in secondary processes, memory init may allocate additional fbarrays
+        * not present in primary processes, so to avoid any potential issues,
+        * initialize memzones first.
+        */
+       if (rte_eal_memzone_init() < 0) {
+               rte_eal_init_alert("Cannot init memzone\n");
+               rte_errno = ENODEV;
+               return -1;
+       }
 
-       if (rte_eal_memory_init() < 0)
-               rte_panic("Cannot init memory\n");
+       if (rte_eal_memory_init() < 0) {
+               rte_eal_init_alert("Cannot init memory\n");
+               rte_errno = ENOMEM;
+               return -1;
+       }
 
        /* the directories are locked during eal_hugepage_info_init */
        eal_hugedirs_unlock();
 
-       if (rte_eal_memzone_init() < 0)
-               rte_panic("Cannot init memzone\n");
+       if (rte_eal_malloc_heap_init() < 0) {
+               rte_eal_init_alert("Cannot init malloc heap\n");
+               rte_errno = ENODEV;
+               return -1;
+       }
 
-       if (rte_eal_tailqs_init() < 0)
-               rte_panic("Cannot init tail queues for objects\n");
+       if (rte_eal_tailqs_init() < 0) {
+               rte_eal_init_alert("Cannot init tail queues for objects\n");
+               rte_errno = EFAULT;
+               return -1;
+       }
 
-       if (rte_eal_alarm_init() < 0)
-               rte_panic("Cannot init interrupt-handling thread\n");
+       if (rte_eal_alarm_init() < 0) {
+               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               /* rte_eal_alarm_init sets rte_errno on failure. */
+               return -1;
+       }
 
-       if (rte_eal_timer_init() < 0)
-               rte_panic("Cannot init HPET or TSC timers\n");
+       if (rte_eal_timer_init() < 0) {
+               rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+               rte_errno = ENOTSUP;
+               return -1;
+       }
 
        eal_check_mem_on_local_socket();
 
-       if (eal_plugins_init() < 0)
-               rte_panic("Cannot init plugins\n");
-
        eal_thread_init_master(rte_config.master_lcore);
 
-       ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+       ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
        RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
                rte_config.master_lcore, (int)thread_id, cpuset,
                ret == 0 ? "" : "...");
 
-       if (rte_eal_intr_init() < 0)
-               rte_panic("Cannot init interrupt-handling thread\n");
-
-       if (rte_bus_scan())
-               rte_panic("Cannot scan the buses for devices\n");
-
        RTE_LCORE_FOREACH_SLAVE(i) {
 
                /*
@@ -882,7 +1003,7 @@ rte_eal_init(int argc, char **argv)
                        rte_panic("Cannot create thread\n");
 
                /* Set thread_name for aid in debugging. */
-               snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+               snprintf(thread_name, sizeof(thread_name),
                        "lcore-slave-%d", i);
                ret = rte_thread_setname(lcore_config[i].thread_id,
                                                thread_name);
@@ -898,22 +1019,65 @@ rte_eal_init(int argc, char **argv)
        rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
        rte_eal_mp_wait_lcore();
 
+       /* initialize services so vdevs register service during bus_probe. */
+       ret = rte_service_init();
+       if (ret) {
+               rte_eal_init_alert("rte_service_init() failed\n");
+               rte_errno = ENOEXEC;
+               return -1;
+       }
+
        /* Probe all the buses and devices/drivers on them */
-       if (rte_bus_probe())
-               rte_panic("Cannot probe devices\n");
+       if (rte_bus_probe()) {
+               rte_eal_init_alert("Cannot probe devices\n");
+               rte_errno = ENOTSUP;
+               return -1;
+       }
 
-       /* Probe & Initialize PCI devices */
-       if (rte_eal_pci_probe())
-               rte_panic("Cannot probe PCI\n");
+#ifdef VFIO_PRESENT
+       /* Register mp action after probe() so that we got enough info */
+       if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
+               return -1;
+#endif
 
-       if (rte_eal_dev_init() < 0)
-               rte_panic("Cannot init pmd devices\n");
+       /* initialize default service/lcore mappings and start running. Ignore
+        * -ENOTSUP, as it indicates no service coremask passed to EAL.
+        */
+       ret = rte_service_start_with_defaults();
+       if (ret < 0 && ret != -ENOTSUP) {
+               rte_errno = ENOEXEC;
+               return -1;
+       }
 
        rte_eal_mcfg_complete();
 
        return fctret;
 }
 
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+               void *arg __rte_unused)
+{
+       /* ms is const, so find this memseg */
+       struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+
+       found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+       return 0;
+}
+
+int __rte_experimental
+rte_eal_cleanup(void)
+{
+       /* if we're in a primary process, we need to mark hugepages as freeable
+        * so that finalization can release them back to the system.
+        */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               rte_memseg_walk(mark_freeable, NULL);
+       rte_service_finalize();
+       return 0;
+}
+
 /* get core role */
 enum rte_lcore_role_t
 rte_eal_lcore_role(unsigned lcore_id)
@@ -932,6 +1096,22 @@ int rte_eal_has_hugepages(void)
        return ! internal_config.no_hugetlbfs;
 }
 
+int rte_eal_has_pci(void)
+{
+       return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+       return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+       return internal_config.vfio_intr_mode;
+}
+
 int
 rte_eal_check_module(const char *module_name)
 {