examples/l2fwd-keepalive: add IPC liveness reporting
authorRemy Horton <remy.horton@intel.com>
Wed, 15 Jun 2016 15:25:51 +0000 (16:25 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Thu, 16 Jun 2016 16:27:00 +0000 (18:27 +0200)
Changes the l2fwd keepalive example to show how the new keepalive
enhancements can be used to relay core state to an external process.

Signed-off-by: Remy Horton <remy.horton@intel.com>
examples/Makefile
examples/l2fwd-keepalive/Makefile
examples/l2fwd-keepalive/ka-agent/Makefile [new file with mode: 0644]
examples/l2fwd-keepalive/ka-agent/main.c [new file with mode: 0644]
examples/l2fwd-keepalive/main.c
examples/l2fwd-keepalive/shm.c [new file with mode: 0644]
examples/l2fwd-keepalive/shm.h [new file with mode: 0644]

index 3bc635a..f650d3e 100644 (file)
@@ -64,6 +64,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += l2fwd-crypto
 DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem
 DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += l2fwd-jobstats
 DIRS-y += l2fwd-keepalive
+DIRS-y += l2fwd-keepalive/ka-agent
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += l3fwd
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl
 ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
index 568edcb..ca45a79 100644 (file)
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
 APP = l2fwd-keepalive
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c shm.c
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDFLAGS += -lrt
 
 include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-keepalive/ka-agent/Makefile b/examples/l2fwd-keepalive/ka-agent/Makefile
new file mode 100644 (file)
index 0000000..fd0c38b
--- /dev/null
@@ -0,0 +1,49 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ka-agent
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)/../
+LDFLAGS += -lrt
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-keepalive/ka-agent/main.c b/examples/l2fwd-keepalive/ka-agent/main.c
new file mode 100644 (file)
index 0000000..be1c7f4
--- /dev/null
@@ -0,0 +1,150 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <rte_keepalive.h>
+
+#include <shm.h>
+
+#define MAX_TIMEOUTS 4
+#define SEM_TIMEOUT_SECS 2
+
+static struct rte_keepalive_shm *ka_shm_create(void)
+{
+       int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666);
+       size_t size = sizeof(struct rte_keepalive_shm);
+       struct rte_keepalive_shm *shm;
+
+       if (fd < 0)
+               printf("Failed to open %s as SHM:%s\n",
+                       RTE_KEEPALIVE_SHM_NAME,
+               strerror(errno));
+       else {
+               shm = (struct rte_keepalive_shm *) mmap(
+                       0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+               close(fd);
+               if (shm == MAP_FAILED)
+                       printf("Failed to mmap SHM:%s\n", strerror(errno));
+               else
+                       return shm;
+       }
+
+       /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */
+       shm = 0;
+       return NULL;
+}
+
+int main(void)
+{
+       struct rte_keepalive_shm *shm = ka_shm_create();
+       struct timespec timeout = { .tv_nsec = 0 };
+       int idx_core;
+       int cnt_cores;
+       uint64_t last_seen_alive_time = 0;
+       uint64_t most_recent_alive_time;
+       int cnt_timeouts = 0;
+       int sem_errno;
+
+       if (shm == NULL) {
+               printf("Unable to access shared core state\n");
+               return 1;
+       }
+       while (1) {
+               most_recent_alive_time = 0;
+               for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
+                               idx_core++)
+                       if (shm->core_last_seen_times[idx_core] >
+                                       most_recent_alive_time)
+                               most_recent_alive_time =
+                                       shm->core_last_seen_times[idx_core];
+
+               timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS;
+               if (sem_timedwait(&shm->core_died, &timeout) == -1) {
+                       /* Assume no core death signals and no change in any
+                        * last-seen times is the keepalive monitor itself
+                        * failing.
+                        */
+                       sem_errno = errno;
+                       last_seen_alive_time = most_recent_alive_time;
+                       if (sem_errno == ETIMEDOUT) {
+                               if (last_seen_alive_time ==
+                                               most_recent_alive_time &&
+                                               cnt_timeouts++ >
+                                               MAX_TIMEOUTS) {
+                                       printf("No updates. Exiting..\n");
+                                       break;
+                                       }
+                       } else
+                               printf("sem_timedwait() error (%s)\n",
+                                       strerror(sem_errno));
+                       continue;
+               }
+               cnt_timeouts = 0;
+
+               cnt_cores = 0;
+               for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
+                               idx_core++)
+                       if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
+                               cnt_cores++;
+               if (cnt_cores == 0) {
+                       /* Can happen if core was restarted since Semaphore
+                        * was sent, due to agent being offline.
+                        */
+                       printf("Warning: Empty dead core report\n");
+                       continue;
+               }
+
+               printf("%i dead cores: ", cnt_cores);
+               for (idx_core = 0;
+                               idx_core < RTE_KEEPALIVE_MAXCORES;
+                               idx_core++)
+                       if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
+                               printf("%d, ", idx_core);
+               printf("\b\b\n");
+       }
+       if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0)
+               printf("Warning: munmap() failed\n");
+       return 0;
+}
index 4a0d9b6..84a59eb 100644 (file)
@@ -72,6 +72,8 @@
 #include <rte_timer.h>
 #include <rte_keepalive.h>
 
+#include "shm.h"
+
 #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
 
 #define NB_MBUF   8192
@@ -523,7 +525,7 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
 }
 
 static void
-dead_core(__attribute__((unused)) void *ptr_data, const int id_core)
+dead_core(__rte_unused void *ptr_data, const int id_core)
 {
        printf("Dead core %i - restarting..\n", id_core);
        if (rte_eal_get_lcore_state(id_core) == FINISHED) {
@@ -534,6 +536,14 @@ dead_core(__attribute__((unused)) void *ptr_data, const int id_core)
        }
 }
 
+static void
+relay_core_state(void *ptr_data, const int id_core,
+       const enum rte_keepalive_state core_state, uint64_t last_alive)
+{
+       rte_keepalive_relayed_state((struct rte_keepalive_shm *)ptr_data,
+               id_core, core_state, last_alive);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -722,10 +732,18 @@ main(int argc, char **argv)
        rte_timer_init(&stats_timer);
 
        if (check_period > 0) {
+               struct rte_keepalive_shm *ka_shm;
+
+               ka_shm = rte_keepalive_shm_create();
+               if (ka_shm == NULL)
+                       rte_exit(EXIT_FAILURE,
+                               "rte_keepalive_shm_create() failed");
                rte_global_keepalive_info =
-                       rte_keepalive_create(&dead_core, NULL);
+                       rte_keepalive_create(&dead_core, ka_shm);
                if (rte_global_keepalive_info == NULL)
                        rte_exit(EXIT_FAILURE, "init_keep_alive() failed");
+               rte_keepalive_register_relay_callback(rte_global_keepalive_info,
+                       relay_core_state, ka_shm);
                rte_timer_init(&hb_timer);
                if (rte_timer_reset(&hb_timer,
                                (check_period * rte_get_timer_hz()) / 1000,
diff --git a/examples/l2fwd-keepalive/shm.c b/examples/l2fwd-keepalive/shm.c
new file mode 100644 (file)
index 0000000..66fc433
--- /dev/null
@@ -0,0 +1,129 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <time.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+
+#include "shm.h"
+
+struct rte_keepalive_shm *rte_keepalive_shm_create(void)
+{
+       int fd;
+       int idx_core;
+       struct rte_keepalive_shm *ka_shm;
+
+       /* If any existing object is not unlinked, it makes it all too easy
+        * for clients to end up with stale shared memory blocks when
+        * restarted. Unlinking makes sure subsequent shm_open by clients
+        * will get the new block mapped below.
+        */
+       if (shm_unlink(RTE_KEEPALIVE_SHM_NAME) == -1 && errno != ENOENT)
+               printf("Warning: Error unlinking stale %s (%s)\n",
+                       RTE_KEEPALIVE_SHM_NAME, strerror(errno));
+
+       fd = shm_open(RTE_KEEPALIVE_SHM_NAME,
+               O_CREAT | O_TRUNC | O_RDWR, 0666);
+       if (fd < 0)
+               RTE_LOG(INFO, EAL,
+                       "Failed to open %s as SHM (%s)\n",
+                       RTE_KEEPALIVE_SHM_NAME,
+                       strerror(errno));
+       else if (ftruncate(fd, sizeof(struct rte_keepalive_shm)) != 0)
+               RTE_LOG(INFO, EAL,
+                       "Failed to resize SHM (%s)\n", strerror(errno));
+       else {
+               ka_shm = (struct rte_keepalive_shm *) mmap(
+                       0, sizeof(struct rte_keepalive_shm),
+                       PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+               close(fd);
+               if (ka_shm == MAP_FAILED)
+                       RTE_LOG(INFO, EAL,
+                               "Failed to mmap SHM (%s)\n", strerror(errno));
+               else {
+                       memset(ka_shm, 0, sizeof(struct rte_keepalive_shm));
+
+                       /* Initialize the semaphores for IPC/SHM use */
+                       if (sem_init(&ka_shm->core_died, 1, 0) != 0) {
+                               RTE_LOG(INFO, EAL,
+                                       "Failed to setup SHM semaphore (%s)\n",
+                                       strerror(errno));
+                               return NULL;
+                       }
+
+                       /* Set all cores to 'not present' */
+                       for (idx_core = 0;
+                                       idx_core < RTE_KEEPALIVE_MAXCORES;
+                                       idx_core++) {
+                               ka_shm->core_state[idx_core] =
+                                       RTE_KA_STATE_UNUSED;
+                               ka_shm->core_last_seen_times[idx_core] = 0;
+                       }
+
+                       return ka_shm;
+               }
+       }
+return NULL;
+}
+
+void rte_keepalive_relayed_state(struct rte_keepalive_shm *shm,
+       const int id_core, const enum rte_keepalive_state core_state,
+       __rte_unused uint64_t last_alive)
+{
+       int count;
+
+       shm->core_state[id_core] = core_state;
+       shm->core_last_seen_times[id_core] = last_alive;
+
+       if (core_state == RTE_KEEPALIVE_SHM_DEAD) {
+               /* Since core has died, also signal ka_agent.
+                *
+                * Limit number of times semaphore can be incremented, in case
+                * ka_agent is not active.
+                */
+               if (sem_getvalue(&shm->core_died, &count) == -1) {
+                       RTE_LOG(INFO, EAL, "Semaphore check failed(%s)\n",
+                               strerror(errno));
+                       return;
+               }
+               if (count > 1)
+                       return;
+
+               if (sem_post(&shm->core_died) != 0)
+                       RTE_LOG(INFO, EAL,
+                               "Failed to increment semaphore (%s)\n",
+                               strerror(errno));
+       }
+}
diff --git a/examples/l2fwd-keepalive/shm.h b/examples/l2fwd-keepalive/shm.h
new file mode 100644 (file)
index 0000000..25e1b61
--- /dev/null
@@ -0,0 +1,89 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_KEEPALIVE_SHM_NAME "/dpdk_keepalive_shm_name"
+
+#define RTE_KEEPALIVE_SHM_ALIVE 1
+#define RTE_KEEPALIVE_SHM_DEAD 2
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <semaphore.h>
+#include <rte_keepalive.h>
+
+/**
+ * Keepalive SHM structure.
+ *
+ * The shared memory allocated by the primary is this size, and contains the
+ * information as contained within this struct. A secondary may open the SHM,
+ * and read the contents.
+ */
+struct rte_keepalive_shm {
+       /** IPC semaphore. Posted when a core dies */
+       sem_t core_died;
+
+       /**
+        * Relayed status of each core.
+        */
+       enum rte_keepalive_state core_state[RTE_KEEPALIVE_MAXCORES];
+
+       /**
+        * Last-seen-alive timestamps for the cores
+        */
+       uint64_t core_last_seen_times[RTE_KEEPALIVE_MAXCORES];
+};
+
+/**
+ * Create shared host memory keepalive object.
+ * @return
+ *  Pointer to SHM keepalive structure, or NULL on failure.
+ */
+struct rte_keepalive_shm *rte_keepalive_shm_create(void);
+
+/**
+ * Relays state for given core
+ * @param *shm
+ *  Pointer to SHM keepalive structure.
+ * @param id_core
+ *  Id of core
+ * @param core_state
+ *  State of core
+ * @param last_alive
+ *  Last seen timestamp for core
+ */
+void rte_keepalive_relayed_state(struct rte_keepalive_shm *shm,
+       const int id_core, const enum rte_keepalive_state core_state,
+       uint64_t last_alive);