From: Jianfeng Tan Date: Tue, 30 Jan 2018 06:58:08 +0000 (+0000) Subject: eal: add channel for multi-process communication X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=bacaa2754017e7d7baf27b8bcef8cdc909b30a26;p=dpdk.git eal: add channel for multi-process communication Previouly, there are three channels for multi-process (i.e., primary/secondary) communication. 1. Config-file based channel, in which, the primary process writes info into a pre-defined config file, and the secondary process reads the info out. 2. vfio submodule has its own channel based on unix socket for the secondary process to get container fd and group fd from the primary process. 3. pdump submodule also has its own channel based on unix socket for packet dump. It'd be good to have a generic communication channel for multi-process communication to accommodate the requirements including: a. Secondary wants to send info to primary, for example, secondary would like to send request (about some specific vdev to primary). b. Sending info at any time, instead of just initialization time. c. Share FDs with the other side, for vdev like vhost, related FDs (memory region, kick) should be shared. d. A send message request needs the other side to response immediately. This patch proposes to create a communication channel, based on datagram unix socket, for above requirements. Each process will block on a unix socket waiting for messages from the peers. Three new APIs are added: 1. rte_eal_mp_action_register() is used to register an action, indexed by a string, when a component at receiver side would like to response the messages from the peer processe. 2. rte_eal_mp_action_unregister() is used to unregister the action if the calling component does not want to response the messages. 3. rte_eal_mp_sendmsg() is used to send a message, and returns immediately. If there are n secondary processes, the primary process will send n messages. Suggested-by: Konstantin Ananyev Signed-off-by: Jianfeng Tan Reviewed-by: Anatoly Burakov Acked-by: Konstantin Ananyev --- diff --git a/doc/guides/rel_notes/release_18_02.rst b/doc/guides/rel_notes/release_18_02.rst index 144a8b0ca9..4126c7421e 100644 --- a/doc/guides/rel_notes/release_18_02.rst +++ b/doc/guides/rel_notes/release_18_02.rst @@ -166,6 +166,15 @@ New Features renamed the application from SW PMD specific ``eventdev_pipeline_sw_pmd`` to PMD agnostic ``eventdev_pipeline``. +* **Added new multi-process communication channel** + + Added a generic channel in EAL for multi-process (primary/secondary) communication. + Consumers of this channel need to register an action with an action name to response + a message received; the actions will be identified by the action name and executed + in the context of a new dedicated thread for this channel. The list of new APIs: + + * ``rte_mp_register`` and ``rte_mp_unregister`` are for action (un)registration. + * ``rte_mp_sendmsg`` is for sending a message without blocking for a response. API Changes ----------- diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 0bac6cf4d1..ba1811a000 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2018 Intel Corporation. All rights reserved. * Copyright(c) 2014 6WIND S.A. * All rights reserved. * @@ -604,6 +604,14 @@ rte_eal_init(int argc, char **argv) rte_config_init(); + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_eal_memory_init() < 0) { rte_eal_init_alert("Cannot init memory\n"); rte_errno = ENOMEM; diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 40fa982cb9..f63c9c293c 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -1,15 +1,51 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2016 Intel Corporation + * Copyright(c) 2016-2018 Intel Corporation */ -#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include + +#include +#include #include +#include +#include +#include +#include "eal_private.h" #include "eal_filesystem.h" #include "eal_internal_cfg.h" +static int mp_fd = -1; +static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */ +static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */ +static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER; + +struct action_entry { + TAILQ_ENTRY(action_entry) next; + char action_name[RTE_MP_MAX_NAME_LEN]; + rte_mp_t action; +}; + +/** Double linked list of actions. */ +TAILQ_HEAD(action_entry_list, action_entry); + +static struct action_entry_list action_entry_list = + TAILQ_HEAD_INITIALIZER(action_entry_list); + int rte_eal_primary_proc_alive(const char *config_file_path) { @@ -31,3 +67,402 @@ rte_eal_primary_proc_alive(const char *config_file_path) return !!ret; } + +static struct action_entry * +find_action_entry_by_name(const char *name) +{ + struct action_entry *entry; + + TAILQ_FOREACH(entry, &action_entry_list, next) { + if (strncmp(entry->action_name, name, RTE_MP_MAX_NAME_LEN) == 0) + break; + } + + return entry; +} + +static int +validate_action_name(const char *name) +{ + if (name == NULL) { + RTE_LOG(ERR, EAL, "Action name cannot be NULL\n"); + rte_errno = -EINVAL; + return -1; + } + if (strnlen(name, RTE_MP_MAX_NAME_LEN) == 0) { + RTE_LOG(ERR, EAL, "Length of action name is zero\n"); + rte_errno = -EINVAL; + return -1; + } + if (strnlen(name, RTE_MP_MAX_NAME_LEN) == RTE_MP_MAX_NAME_LEN) { + rte_errno = -E2BIG; + return -1; + } + return 0; +} + +int __rte_experimental +rte_mp_action_register(const char *name, rte_mp_t action) +{ + struct action_entry *entry; + + if (validate_action_name(name)) + return -1; + + entry = malloc(sizeof(struct action_entry)); + if (entry == NULL) { + rte_errno = -ENOMEM; + return -1; + } + strcpy(entry->action_name, name); + entry->action = action; + + pthread_mutex_lock(&mp_mutex_action); + if (find_action_entry_by_name(name) != NULL) { + pthread_mutex_unlock(&mp_mutex_action); + rte_errno = -EEXIST; + free(entry); + return -1; + } + TAILQ_INSERT_TAIL(&action_entry_list, entry, next); + pthread_mutex_unlock(&mp_mutex_action); + return 0; +} + +void __rte_experimental +rte_mp_action_unregister(const char *name) +{ + struct action_entry *entry; + + if (validate_action_name(name)) + return; + + pthread_mutex_lock(&mp_mutex_action); + entry = find_action_entry_by_name(name); + if (entry == NULL) { + pthread_mutex_unlock(&mp_mutex_action); + return; + } + TAILQ_REMOVE(&action_entry_list, entry, next); + pthread_mutex_unlock(&mp_mutex_action); + free(entry); +} + +static int +read_msg(struct rte_mp_msg *msg) +{ + int msglen; + struct iovec iov; + struct msghdr msgh; + char control[CMSG_SPACE(sizeof(msg->fds))]; + struct cmsghdr *cmsg; + int buflen = sizeof(*msg) - sizeof(msg->fds); + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = msg; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + msglen = recvmsg(mp_fd, &msgh, 0); + if (msglen < 0) { + RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno)); + return -1; + } + + if (msglen != buflen || (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + RTE_LOG(ERR, EAL, "truncted msg\n"); + return -1; + } + + /* read auxiliary FDs if any */ + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { + if ((cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS)) { + memcpy(msg->fds, CMSG_DATA(cmsg), sizeof(msg->fds)); + break; + } + } + + return 0; +} + +static void +process_msg(struct rte_mp_msg *msg) +{ + struct action_entry *entry; + rte_mp_t action = NULL; + + RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name); + pthread_mutex_lock(&mp_mutex_action); + entry = find_action_entry_by_name(msg->name); + if (entry != NULL) + action = entry->action; + pthread_mutex_unlock(&mp_mutex_action); + + if (!action) + RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name); + else if (action(msg) < 0) + RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name); +} + +static void * +mp_handle(void *arg __rte_unused) +{ + struct rte_mp_msg msg; + + while (1) { + if (read_msg(&msg) == 0) + process_msg(&msg); + } + + return NULL; +} + +static int +open_socket_fd(void) +{ + struct sockaddr_un un; + const char *prefix = eal_mp_socket_path(); + + mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (mp_fd < 0) { + RTE_LOG(ERR, EAL, "failed to create unix socket\n"); + return -1; + } + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix); + else { + snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64, + prefix, getpid(), rte_rdtsc()); + } + unlink(un.sun_path); /* May still exist since last run */ + if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) { + RTE_LOG(ERR, EAL, "failed to bind %s: %s\n", + un.sun_path, strerror(errno)); + close(mp_fd); + return -1; + } + + RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path); + return mp_fd; +} + +static int +unlink_sockets(const char *filter) +{ + int dir_fd; + DIR *mp_dir; + struct dirent *ent; + + mp_dir = opendir(mp_dir_path); + if (!mp_dir) { + RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path); + return -1; + } + dir_fd = dirfd(mp_dir); + + while ((ent = readdir(mp_dir))) { + if (fnmatch(filter, ent->d_name, 0) == 0) + unlinkat(dir_fd, ent->d_name, 0); + } + + closedir(mp_dir); + return 0; +} + +static void +unlink_socket_by_path(const char *path) +{ + char *filename; + char *fullpath = strdup(path); + + if (!fullpath) + return; + filename = basename(fullpath); + unlink_sockets(filename); + free(fullpath); + RTE_LOG(INFO, EAL, "Remove socket %s\n", path); +} + +int +rte_mp_channel_init(void) +{ + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + char *path; + pthread_t tid; + + snprintf(mp_filter, PATH_MAX, ".%s_unix_*", + internal_config.hugefile_prefix); + + path = strdup(eal_mp_socket_path()); + snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path)); + free(path); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY && + unlink_sockets(mp_filter)) { + RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n"); + return -1; + } + + if (open_socket_fd() < 0) + return -1; + + if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) { + RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n", + strerror(errno)); + close(mp_fd); + mp_fd = -1; + return -1; + } + + /* try best to set thread name */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle"); + rte_thread_setname(tid, thread_name); + return 0; +} + +/** + * Return -1, as fail to send message and it's caused by the local side. + * Return 0, as fail to send message and it's caused by the remote side. + * Return 1, as succeed to send message. + * + */ +static int +send_msg(const char *dst_path, struct rte_mp_msg *msg) +{ + int snd; + struct iovec iov; + struct msghdr msgh; + struct cmsghdr *cmsg; + struct sockaddr_un dst; + int fd_size = msg->num_fds * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + + memset(&dst, 0, sizeof(dst)); + dst.sun_family = AF_UNIX; + snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path); + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + iov.iov_base = msg; + iov.iov_len = sizeof(*msg) - sizeof(msg->fds); + + msgh.msg_name = &dst; + msgh.msg_namelen = sizeof(dst); + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), msg->fds, fd_size); + + do { + snd = sendmsg(mp_fd, &msgh, 0); + } while (snd < 0 && errno == EINTR); + + if (snd < 0) { + rte_errno = errno; + /* Check if it caused by peer process exits */ + if (errno == -ECONNREFUSED) { + /* We don't unlink the primary's socket here */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + unlink_socket_by_path(dst_path); + return 0; + } + if (errno == -ENOBUFS) { + RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n", + dst_path); + return 0; + } + RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n", + dst_path, strerror(errno)); + return -1; + } + + return 1; +} + +static int +mp_send(struct rte_mp_msg *msg) +{ + int ret = 0; + DIR *mp_dir; + struct dirent *ent; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + if (send_msg(eal_mp_socket_path(), msg) < 0) + return -1; + else + return 0; + } + + /* broadcast to all secondary processes */ + mp_dir = opendir(mp_dir_path); + if (!mp_dir) { + RTE_LOG(ERR, EAL, "Unable to open directory %s\n", + mp_dir_path); + rte_errno = errno; + return -1; + } + while ((ent = readdir(mp_dir))) { + if (fnmatch(mp_filter, ent->d_name, 0) != 0) + continue; + + if (send_msg(ent->d_name, msg) < 0) + ret = -1; + } + closedir(mp_dir); + + return ret; +} + +static bool +check_input(const struct rte_mp_msg *msg) +{ + if (msg == NULL) { + RTE_LOG(ERR, EAL, "Msg cannot be NULL\n"); + rte_errno = -EINVAL; + return false; + } + + if (validate_action_name(msg->name)) + return false; + + if (msg->len_param > RTE_MP_MAX_PARAM_LEN) { + RTE_LOG(ERR, EAL, "Message data is too long\n"); + rte_errno = -E2BIG; + return false; + } + + if (msg->num_fds > RTE_MP_MAX_FD_NUM) { + RTE_LOG(ERR, EAL, "Cannot send more than %d FDs\n", + RTE_MP_MAX_FD_NUM); + rte_errno = -E2BIG; + return false; + } + + return true; +} + +int __rte_experimental +rte_mp_sendmsg(struct rte_mp_msg *msg) +{ + if (!check_input(msg)) + return -1; + + RTE_LOG(DEBUG, EAL, "sendmsg: %s\n", msg->name); + return mp_send(msg); +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index e8959eb0c9..4708dd5499 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ /** @@ -38,6 +38,23 @@ eal_runtime_config_path(void) return buffer; } +/** Path of primary/secondary communication unix socket file. */ +#define MP_SOCKET_PATH_FMT "%s/.%s_unix" +static inline const char * +eal_mp_socket_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, MP_SOCKET_PATH_FMT, + directory, internal_config.hugefile_prefix); + + return buffer; +} + /** Path of hugepage info file. */ #define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info" diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index c46dd8f343..0b2877000b 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _EAL_PRIVATE_H_ @@ -195,4 +195,14 @@ int rte_eal_hugepage_attach(void); */ struct rte_bus *rte_bus_find_by_device_name(const char *str); +/** + * Create the unix channel for primary/secondary communication. + * + * @return + * 0 on success; + * (<0) on failure. + */ + +int rte_mp_channel_init(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 1f37c7a54e..2d022c0cc5 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _RTE_EAL_H_ @@ -203,6 +203,84 @@ int __rte_experimental rte_eal_cleanup(void); */ int rte_eal_primary_proc_alive(const char *config_file_path); +#define RTE_MP_MAX_FD_NUM 8 /* The max amount of fds */ +#define RTE_MP_MAX_NAME_LEN 64 /* The max length of action name */ +#define RTE_MP_MAX_PARAM_LEN 256 /* The max length of param */ +struct rte_mp_msg { + char name[RTE_MP_MAX_NAME_LEN]; + int len_param; + int num_fds; + uint8_t param[RTE_MP_MAX_PARAM_LEN]; + int fds[RTE_MP_MAX_FD_NUM]; +}; + +/** + * Action function typedef used by other components. + * + * As we create socket channel for primary/secondary communication, use + * this function typedef to register action for coming messages. + */ +typedef int (*rte_mp_t)(const struct rte_mp_msg *msg); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register an action function for primary/secondary communication. + * + * Call this function to register an action, if the calling component wants + * to response the messages from the corresponding component in its primary + * process or secondary processes. + * + * @param name + * The name argument plays as the nonredundant key to find the action. + * + * @param action + * The action argument is the function pointer to the action function. + * + * @return + * - 0 on success. + * - (<0) on failure. + */ +int __rte_experimental +rte_mp_action_register(const char *name, rte_mp_t action); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Unregister an action function for primary/secondary communication. + * + * Call this function to unregister an action if the calling component does + * not want to response the messages from the corresponding component in its + * primary process or secondary processes. + * + * @param name + * The name argument plays as the nonredundant key to find the action. + * + */ +void __rte_experimental +rte_mp_action_unregister(const char *name); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Send a message to the peer process. + * + * This function will send a message which will be responsed by the action + * identified by name in the peer process. + * + * @param msg + * The msg argument contains the customized message. + * + * @return + * - On success, return 0. + * - On failure, return -1, and the reason will be stored in rte_errno. + */ +int __rte_experimental +rte_mp_sendmsg(struct rte_mp_msg *msg); + /** * Usage function typedef used by the application usage function. * diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 828baacd8e..66f7585a32 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2018 Intel Corporation. All rights reserved. * Copyright(c) 2012-2014 6WIND S.A. * All rights reserved. * @@ -853,6 +853,14 @@ rte_eal_init(int argc, char **argv) return -1; } + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + #ifdef VFIO_PRESENT if (rte_eal_vfio_setup() < 0) { rte_eal_init_alert("Cannot init VFIO\n"); diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 93f6c13508..24deaef3c2 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -220,6 +220,9 @@ EXPERIMENTAL { rte_eal_devargs_remove; rte_eal_hotplug_add; rte_eal_hotplug_remove; + rte_mp_action_register; + rte_mp_action_unregister; + rte_mp_sendmsg; rte_service_attr_get; rte_service_attr_reset_all; rte_service_component_register;