eal: add keep alive monitoring
authorRemy Horton <remy.horton@intel.com>
Wed, 18 Nov 2015 14:05:14 +0000 (14:05 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Thu, 19 Nov 2015 14:44:51 +0000 (15:44 +0100)
Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.

Signed-off-by: Remy Horton <remy.horton@intel.com>
MAINTAINERS
doc/api/doxy-api-index.md
doc/guides/rel_notes/release_2_2.rst
lib/librte_eal/bsdapp/eal/Makefile
lib/librte_eal/bsdapp/eal/rte_eal_version.map
lib/librte_eal/common/Makefile
lib/librte_eal/common/include/rte_keepalive.h [new file with mode: 0644]
lib/librte_eal/common/rte_keepalive.c [new file with mode: 0644]
lib/librte_eal/linuxapp/eal/Makefile
lib/librte_eal/linuxapp/eal/rte_eal_version.map

index b1c1bd3..0bfb941 100644 (file)
@@ -108,6 +108,11 @@ F: app/test/test_malloc.c
 F: app/test/test_memory.c
 F: app/test/test_memzone.c
 
+Keep alive
+M: Remy Horton <remy.horton@intel.com>
+F: lib/librte_eal/common/include/rte_keepalive.h
+F: lib/librte_eal/common/rte_keepalive.c
+
 Secondary process
 K: RTE_PROC_
 F: doc/guides/prog_guide/multi_proc_support.rst
index 72ac3c4..f2c0320 100644 (file)
@@ -146,4 +146,5 @@ There are many libraries, so their headers may be grouped by topics:
   [EAL config]         (@ref rte_eal.h),
   [common]             (@ref rte_common.h),
   [ABI compat]         (@ref rte_compat.h),
+  [keepalive]          (@ref rte_keepalive.h),
   [version]            (@ref rte_version.h)
index 43c425c..56743ca 100644 (file)
@@ -15,6 +15,8 @@ New Features
   New function rte_ring_free() allows the user to free a ring
   if it was created with rte_ring_create().
 
+* **Added keepalive support to EAL.**
+
 * **Added ethdev API to support IEEE1588.**
 
   Added functions to read, write and adjust system time in the NIC.
index a49dcec..65b293f 100644 (file)
@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 #CFLAGS_eal_thread.o := -D_GNU_SOURCE
index 8b00761..f9d4b1c 100644 (file)
@@ -130,5 +130,9 @@ DPDK_2.2 {
        global:
 
        rte_intr_cap_multiple;
+       rte_keepalive_create;
+       rte_keepalive_dispatch_pings;
+       rte_keepalive_mark_alive;
+       rte_keepalive_register_core;
 
-} DPDK_2.1;
\ No newline at end of file
+} DPDK_2.1;
index 8508473..f5ea0ee 100644 (file)
@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_dev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
-INC += rte_malloc.h rte_time.h
+INC += rte_malloc.h rte_keepalive.h rte_time.h
 
 ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
 INC += rte_warnings.h
diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
new file mode 100644 (file)
index 0000000..02472c0
--- /dev/null
@@ -0,0 +1,146 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file rte_keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+
+/**
+ * Keepalive failure callback.
+ *
+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the
+ *  failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+       void *data,
+       const int id_core);
+
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive {
+       /** Core Liveness. */
+       enum {
+               ALIVE = 1,
+               MISSING = 0,
+               DEAD = 2,
+               GONE = 3
+       } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+       /** Last-seen-alive timestamps */
+       uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+       /**
+        * Cores to check.
+        * Indexed by core id, non-zero if the core should be checked.
+        */
+       uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+       /** Dead core handler. */
+       rte_keepalive_failure_callback_t callback;
+
+       /**
+        * Dead core handler app data.
+        * Pointer is passed to dead core handler.
+        */
+       void *callback_data;
+       uint64_t tsc_initial;
+       uint64_t tsc_mhz;
+};
+
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ *   Function called upon detection of a dead core.
+ * @param data
+ *   Data pointer to be passed to function callback.
+ * @return
+ *   Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+       rte_keepalive_failure_callback_t callback,
+       void *data);
+
+
+/**
+ * Checks & handles keepalive state of monitored cores.
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data  Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ * @param id_core
+ *   ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+       const int id_core);
+
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+static inline void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+       keepcfg->state_flags[rte_lcore_id()] = ALIVE;
+}
+
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
new file mode 100644 (file)
index 0000000..736fd0f
--- /dev/null
@@ -0,0 +1,113 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+       RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+               msg,
+               ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+               / rte_get_tsc_hz()
+             );
+}
+
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+       void *ptr_data)
+{
+       struct rte_keepalive *keepcfg = ptr_data;
+       int idx_core;
+
+       for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+               if (keepcfg->active_cores[idx_core] == 0)
+                       continue;
+
+               switch (keepcfg->state_flags[idx_core]) {
+               case ALIVE: /* Alive */
+                       keepcfg->state_flags[idx_core] = MISSING;
+                       keepcfg->last_alive[idx_core] = rte_rdtsc();
+                       break;
+               case MISSING: /* MIA */
+                       print_trace("Core MIA. ", keepcfg, idx_core);
+                       keepcfg->state_flags[idx_core] = DEAD;
+                       break;
+               case DEAD: /* Dead */
+                       keepcfg->state_flags[idx_core] = GONE;
+                       print_trace("Core died. ", keepcfg, idx_core);
+                       if (keepcfg->callback)
+                               keepcfg->callback(
+                                       keepcfg->callback_data,
+                                       idx_core
+                                       );
+                       break;
+               case GONE: /* Buried */
+                       break;
+               }
+       }
+}
+
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+       void *data)
+{
+       struct rte_keepalive *keepcfg;
+
+       keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+               sizeof(struct rte_keepalive),
+               RTE_CACHE_LINE_SIZE);
+       if (keepcfg != NULL) {
+               keepcfg->callback = callback;
+               keepcfg->callback_data = data;
+               keepcfg->tsc_initial = rte_rdtsc();
+               keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+       }
+       return keepcfg;
+}
+
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+       if (id_core < RTE_KEEPALIVE_MAXCORES)
+               keepcfg->active_cores[id_core] = 1;
+}
index 7e36b86..26eced5 100644 (file)
@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
index cb9f4d6..54d496e 100644 (file)
@@ -133,5 +133,9 @@ DPDK_2.2 {
        global:
 
        rte_intr_cap_multiple;
+       rte_keepalive_create;
+       rte_keepalive_dispatch_pings;
+       rte_keepalive_mark_alive;
+       rte_keepalive_register_core;
 
-} DPDK_2.1;
\ No newline at end of file
+} DPDK_2.1;