bpf: add BPF loading and execution framework
authorKonstantin Ananyev <konstantin.ananyev@intel.com>
Thu, 10 May 2018 10:23:03 +0000 (11:23 +0100)
committerThomas Monjalon <thomas@monjalon.net>
Fri, 11 May 2018 22:35:15 +0000 (00:35 +0200)
librte_bpf provides a framework to load and execute eBPF bytecode
inside user-space dpdk based applications.
It supports basic set of features from eBPF spec
(https://www.kernel.org/doc/Documentation/networking/filter.txt).

Not currently supported features:
 - JIT
 - cBPF
 - tail-pointer call
 - eBPF MAP
 - skb
 - function calls for 32-bit apps
 - mbuf pointer as input parameter for 32-bit apps

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
18 files changed:
MAINTAINERS
config/common_base
doc/api/doxy-api-index.md
doc/api/doxy-api.conf
doc/guides/rel_notes/release_18_05.rst
lib/Makefile
lib/librte_bpf/Makefile [new file with mode: 0644]
lib/librte_bpf/bpf.c [new file with mode: 0644]
lib/librte_bpf/bpf_def.h [new file with mode: 0644]
lib/librte_bpf/bpf_exec.c [new file with mode: 0644]
lib/librte_bpf/bpf_impl.h [new file with mode: 0644]
lib/librte_bpf/bpf_load.c [new file with mode: 0644]
lib/librte_bpf/bpf_validate.c [new file with mode: 0644]
lib/librte_bpf/meson.build [new file with mode: 0644]
lib/librte_bpf/rte_bpf.h [new file with mode: 0644]
lib/librte_bpf/rte_bpf_version.map [new file with mode: 0644]
lib/meson.build
mk/rte.app.mk

index 93e344b..8b4972e 100644 (file)
@@ -1100,6 +1100,10 @@ Latency statistics
 M: Reshma Pattan <reshma.pattan@intel.com>
 F: lib/librte_latencystats/
 
+BPF
+M: Konstantin Ananyev <konstantin.ananyev@intel.com>
+F: lib/librte_bpf/
+
 
 Test Applications
 -----------------
index 452e800..10f9f90 100644 (file)
@@ -868,6 +868,11 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=n
 #
 CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD=n
 
+#
+# Compile librte_bpf
+#
+CONFIG_RTE_LIBRTE_BPF=y
+
 #
 # Compile the test application
 #
index 270aae2..9265907 100644 (file)
@@ -114,7 +114,8 @@ The public API headers are grouped by topics:
   [EFD]                (@ref rte_efd.h),
   [ACL]                (@ref rte_acl.h),
   [member]             (@ref rte_member.h),
-  [flow classify]      (@ref rte_flow_classify.h)
+  [flow classify]      (@ref rte_flow_classify.h),
+  [BPF]                (@ref rte_bpf.h)
 
 - **containers**:
   [mbuf]               (@ref rte_mbuf.h),
index dfa4158..a3b1c0b 100644 (file)
@@ -45,6 +45,7 @@ INPUT                   = doc/api/doxy-api-index.md \
                           lib/librte_acl \
                           lib/librte_bbdev \
                           lib/librte_bitratestats \
+                          lib/librte_bpf \
                           lib/librte_cfgfile \
                           lib/librte_cmdline \
                           lib/librte_compat \
index f0df4e0..aef6b62 100644 (file)
@@ -222,6 +222,11 @@ New Features
   stats/xstats on shared memory from secondary process, and also pdump packets on
   those virtual devices.
 
+* **Added the BPF Library.**
+
+  The BPF Library provides the ability to load and execute
+  Enhanced Berkeley Packet Filter (eBPF) within user-space dpdk application.
+
 
 API Changes
 -----------
@@ -483,6 +488,7 @@ The libraries prepended with a plus sign were incremented in this version.
      librte_acl.so.2
      librte_bbdev.so.1
      librte_bitratestats.so.2
+   + librte_bpf.so.1
      librte_bus_dpaa.so.1
      librte_bus_fslmc.so.1
      librte_bus_pci.so.1
index c59f682..d82462b 100644 (file)
@@ -102,6 +102,8 @@ DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
 DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net
 DEPDIRS-librte_gso += librte_mempool
+DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
+DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
new file mode 100644 (file)
index 0000000..da93065
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_bpf.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_net -lrte_eal
+LDLIBS += -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf -lrte_ethdev
+
+EXPORT_MAP := rte_bpf_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+
+# install header files
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
new file mode 100644 (file)
index 0000000..d7f68c0
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+int rte_bpf_logtype;
+
+__rte_experimental void
+rte_bpf_destroy(struct rte_bpf *bpf)
+{
+       if (bpf != NULL) {
+               if (bpf->jit.func != NULL)
+                       munmap(bpf->jit.func, bpf->jit.sz);
+               munmap(bpf, bpf->sz);
+       }
+}
+
+__rte_experimental int
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit)
+{
+       if (bpf == NULL || jit == NULL)
+               return -EINVAL;
+
+       jit[0] = bpf->jit;
+       return 0;
+}
+
+int
+bpf_jit(struct rte_bpf *bpf)
+{
+       int32_t rc;
+
+       rc = -ENOTSUP;
+       if (rc != 0)
+               RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n",
+                       __func__, bpf, rc);
+       return rc;
+}
+
+RTE_INIT(rte_bpf_init_log);
+
+static void
+rte_bpf_init_log(void)
+{
+       rte_bpf_logtype = rte_log_register("lib.bpf");
+       if (rte_bpf_logtype >= 0)
+               rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
new file mode 100644 (file)
index 0000000..6b69de3
--- /dev/null
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_BPF_DEF_H_
+#define _RTE_BPF_DEF_H_
+
+/**
+ * @file
+ *
+ * classic BPF (cBPF) and extended BPF (eBPF) related defines.
+ * For more information regarding cBPF and eBPF ISA and their differences,
+ * please refer to:
+ * https://www.kernel.org/doc/Documentation/networking/filter.txt.
+ * As a rule of thumb for that file:
+ * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix,
+ * while eBPF only ones start with ebpf(EBPF)) prefix.
+ */
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The instruction encodings.
+ */
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define        BPF_LD          0x00
+#define        BPF_LDX         0x01
+#define        BPF_ST          0x02
+#define        BPF_STX         0x03
+#define        BPF_ALU         0x04
+#define        BPF_JMP         0x05
+#define        BPF_RET         0x06
+#define        BPF_MISC        0x07
+
+#define EBPF_ALU64     0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define        BPF_W           0x00
+#define        BPF_H           0x08
+#define        BPF_B           0x10
+#define        EBPF_DW         0x18
+
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define        BPF_IMM         0x00
+#define        BPF_ABS         0x20
+#define        BPF_IND         0x40
+#define        BPF_MEM         0x60
+#define        BPF_LEN         0x80
+#define        BPF_MSH         0xa0
+
+#define EBPF_XADD      0xc0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define        BPF_ADD         0x00
+#define        BPF_SUB         0x10
+#define        BPF_MUL         0x20
+#define        BPF_DIV         0x30
+#define        BPF_OR          0x40
+#define        BPF_AND         0x50
+#define        BPF_LSH         0x60
+#define        BPF_RSH         0x70
+#define        BPF_NEG         0x80
+#define        BPF_MOD         0x90
+#define        BPF_XOR         0xa0
+
+#define EBPF_MOV       0xb0
+#define EBPF_ARSH      0xc0
+#define EBPF_END       0xd0
+
+#define        BPF_JA          0x00
+#define        BPF_JEQ         0x10
+#define        BPF_JGT         0x20
+#define        BPF_JGE         0x30
+#define        BPF_JSET        0x40
+
+#define EBPF_JNE       0x50
+#define EBPF_JSGT      0x60
+#define EBPF_JSGE      0x70
+#define EBPF_CALL      0x80
+#define EBPF_EXIT      0x90
+#define EBPF_JLT       0xa0
+#define EBPF_JLE       0xb0
+#define EBPF_JSLT      0xc0
+#define EBPF_JSLE      0xd0
+
+#define BPF_SRC(code)   ((code) & 0x08)
+#define        BPF_K           0x00
+#define        BPF_X           0x08
+
+/* if BPF_OP(code) == EBPF_END */
+#define EBPF_TO_LE     0x00  /* convert to little-endian */
+#define EBPF_TO_BE     0x08  /* convert to big-endian */
+
+/*
+ * eBPF registers
+ */
+enum {
+       EBPF_REG_0,  /* return value from internal function/for eBPF program */
+       EBPF_REG_1,  /* 0-th argument to internal function */
+       EBPF_REG_2,  /* 1-th argument to internal function */
+       EBPF_REG_3,  /* 2-th argument to internal function */
+       EBPF_REG_4,  /* 3-th argument to internal function */
+       EBPF_REG_5,  /* 4-th argument to internal function */
+       EBPF_REG_6,  /* callee saved register */
+       EBPF_REG_7,  /* callee saved register */
+       EBPF_REG_8,  /* callee saved register */
+       EBPF_REG_9,  /* callee saved register */
+       EBPF_REG_10, /* stack pointer (read-only) */
+       EBPF_REG_NUM,
+};
+
+/*
+ * eBPF instruction format
+ */
+struct ebpf_insn {
+       uint8_t code;
+       uint8_t dst_reg:4;
+       uint8_t src_reg:4;
+       int16_t off;
+       int32_t imm;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_BPF_DEF_H_ */
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
new file mode 100644 (file)
index 0000000..e373b1f
--- /dev/null
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define BPF_JMP_UNC(ins)       ((ins) += (ins)->off)
+
+#define BPF_JMP_CND_REG(reg, ins, op, type)    \
+       ((ins) += \
+               ((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \
+               (ins)->off : 0)
+
+#define BPF_JMP_CND_IMM(reg, ins, op, type)    \
+       ((ins) += \
+               ((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \
+               (ins)->off : 0)
+
+#define BPF_NEG_ALU(reg, ins, type)    \
+       ((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg]))
+
+#define EBPF_MOV_ALU_REG(reg, ins, type)       \
+       ((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg])
+
+#define BPF_OP_ALU_REG(reg, ins, op, type)     \
+       ((reg)[(ins)->dst_reg] = \
+               (type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg])
+
+#define EBPF_MOV_ALU_IMM(reg, ins, type)       \
+       ((reg)[(ins)->dst_reg] = (type)(ins)->imm)
+
+#define BPF_OP_ALU_IMM(reg, ins, op, type)     \
+       ((reg)[(ins)->dst_reg] = \
+               (type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
+
+#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
+       if ((type)(reg)[(ins)->src_reg] == 0) { \
+               RTE_BPF_LOG(ERR, \
+                       "%s(%p): division by 0 at pc: %#zx;\n", \
+                       __func__, bpf, \
+                       (uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \
+               return 0; \
+       } \
+} while (0)
+
+#define BPF_LD_REG(reg, ins, type)     \
+       ((reg)[(ins)->dst_reg] = \
+               *(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off))
+
+#define BPF_ST_IMM(reg, ins, type)     \
+       (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+               (type)(ins)->imm)
+
+#define BPF_ST_REG(reg, ins, type)     \
+       (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+               (type)(reg)[(ins)->src_reg])
+
+#define BPF_ST_XADD_REG(reg, ins, tp)  \
+       (rte_atomic##tp##_add((rte_atomic##tp##_t *) \
+               (uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \
+               reg[ins->src_reg]))
+
+static inline void
+bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+       uint64_t *v;
+
+       v = reg + ins->dst_reg;
+       switch (ins->imm) {
+       case 16:
+               *v = rte_cpu_to_be_16(*v);
+               break;
+       case 32:
+               *v = rte_cpu_to_be_32(*v);
+               break;
+       case 64:
+               *v = rte_cpu_to_be_64(*v);
+               break;
+       }
+}
+
+static inline void
+bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+       uint64_t *v;
+
+       v = reg + ins->dst_reg;
+       switch (ins->imm) {
+       case 16:
+               *v = rte_cpu_to_le_16(*v);
+               break;
+       case 32:
+               *v = rte_cpu_to_le_32(*v);
+               break;
+       case 64:
+               *v = rte_cpu_to_le_64(*v);
+               break;
+       }
+}
+
+static inline uint64_t
+bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
+{
+       const struct ebpf_insn *ins;
+
+       for (ins = bpf->prm.ins; ; ins++) {
+               switch (ins->code) {
+               /* 32 bit ALU IMM operations */
+               case (BPF_ALU | BPF_ADD | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, +, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_SUB | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, -, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_AND | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, &, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_OR | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_LSH | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_RSH | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_XOR | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_MUL | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, *, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_DIV | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, /, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_MOD | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, %, uint32_t);
+                       break;
+               case (BPF_ALU | EBPF_MOV | BPF_K):
+                       EBPF_MOV_ALU_IMM(reg, ins, uint32_t);
+                       break;
+               /* 32 bit ALU REG operations */
+               case (BPF_ALU | BPF_ADD | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, +, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_SUB | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, -, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_AND | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, &, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_OR | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, |, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_LSH | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_RSH | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_XOR | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_MUL | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, *, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_DIV | BPF_X):
+                       BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+                       BPF_OP_ALU_REG(reg, ins, /, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_MOD | BPF_X):
+                       BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+                       BPF_OP_ALU_REG(reg, ins, %, uint32_t);
+                       break;
+               case (BPF_ALU | EBPF_MOV | BPF_X):
+                       EBPF_MOV_ALU_REG(reg, ins, uint32_t);
+                       break;
+               case (BPF_ALU | BPF_NEG):
+                       BPF_NEG_ALU(reg, ins, uint32_t);
+                       break;
+               case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+                       bpf_alu_be(reg, ins);
+                       break;
+               case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+                       bpf_alu_le(reg, ins);
+                       break;
+               /* 64 bit ALU IMM operations */
+               case (EBPF_ALU64 | BPF_ADD | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, +, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_SUB | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, -, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_AND | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, &, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_OR | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_LSH | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_RSH | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
+                       break;
+               case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_XOR | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_MUL | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, *, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_DIV | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, /, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_MOD | BPF_K):
+                       BPF_OP_ALU_IMM(reg, ins, %, uint64_t);
+                       break;
+               case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+                       EBPF_MOV_ALU_IMM(reg, ins, uint64_t);
+                       break;
+               /* 64 bit ALU REG operations */
+               case (EBPF_ALU64 | BPF_ADD | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, +, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_SUB | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, -, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_AND | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, &, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_OR | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, |, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_LSH | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_RSH | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
+                       break;
+               case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, >>, int64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_XOR | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_MUL | BPF_X):
+                       BPF_OP_ALU_REG(reg, ins, *, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_DIV | BPF_X):
+                       BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+                       BPF_OP_ALU_REG(reg, ins, /, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_MOD | BPF_X):
+                       BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+                       BPF_OP_ALU_REG(reg, ins, %, uint64_t);
+                       break;
+               case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+                       EBPF_MOV_ALU_REG(reg, ins, uint64_t);
+                       break;
+               case (EBPF_ALU64 | BPF_NEG):
+                       BPF_NEG_ALU(reg, ins, uint64_t);
+                       break;
+               /* load instructions */
+               case (BPF_LDX | BPF_MEM | BPF_B):
+                       BPF_LD_REG(reg, ins, uint8_t);
+                       break;
+               case (BPF_LDX | BPF_MEM | BPF_H):
+                       BPF_LD_REG(reg, ins, uint16_t);
+                       break;
+               case (BPF_LDX | BPF_MEM | BPF_W):
+                       BPF_LD_REG(reg, ins, uint32_t);
+                       break;
+               case (BPF_LDX | BPF_MEM | EBPF_DW):
+                       BPF_LD_REG(reg, ins, uint64_t);
+                       break;
+               /* load 64 bit immediate value */
+               case (BPF_LD | BPF_IMM | EBPF_DW):
+                       reg[ins->dst_reg] = (uint32_t)ins[0].imm |
+                               (uint64_t)(uint32_t)ins[1].imm << 32;
+                       ins++;
+                       break;
+               /* store instructions */
+               case (BPF_STX | BPF_MEM | BPF_B):
+                       BPF_ST_REG(reg, ins, uint8_t);
+                       break;
+               case (BPF_STX | BPF_MEM | BPF_H):
+                       BPF_ST_REG(reg, ins, uint16_t);
+                       break;
+               case (BPF_STX | BPF_MEM | BPF_W):
+                       BPF_ST_REG(reg, ins, uint32_t);
+                       break;
+               case (BPF_STX | BPF_MEM | EBPF_DW):
+                       BPF_ST_REG(reg, ins, uint64_t);
+                       break;
+               case (BPF_ST | BPF_MEM | BPF_B):
+                       BPF_ST_IMM(reg, ins, uint8_t);
+                       break;
+               case (BPF_ST | BPF_MEM | BPF_H):
+                       BPF_ST_IMM(reg, ins, uint16_t);
+                       break;
+               case (BPF_ST | BPF_MEM | BPF_W):
+                       BPF_ST_IMM(reg, ins, uint32_t);
+                       break;
+               case (BPF_ST | BPF_MEM | EBPF_DW):
+                       BPF_ST_IMM(reg, ins, uint64_t);
+                       break;
+               /* atomic add instructions */
+               case (BPF_STX | EBPF_XADD | BPF_W):
+                       BPF_ST_XADD_REG(reg, ins, 32);
+                       break;
+               case (BPF_STX | EBPF_XADD | EBPF_DW):
+                       BPF_ST_XADD_REG(reg, ins, 64);
+                       break;
+               /* jump instructions */
+               case (BPF_JMP | BPF_JA):
+                       BPF_JMP_UNC(ins);
+                       break;
+               /* jump IMM instructions */
+               case (BPF_JMP | BPF_JEQ | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, ==, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JNE | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, !=, uint64_t);
+                       break;
+               case (BPF_JMP | BPF_JGT | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, >, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JLT | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, <, uint64_t);
+                       break;
+               case (BPF_JMP | BPF_JGE | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, >=, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JLE | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, <=, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSGT | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, >, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSLT | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, <, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSGE | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, >=, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSLE | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, <=, int64_t);
+                       break;
+               case (BPF_JMP | BPF_JSET | BPF_K):
+                       BPF_JMP_CND_IMM(reg, ins, &, uint64_t);
+                       break;
+               /* jump REG instructions */
+               case (BPF_JMP | BPF_JEQ | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, ==, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JNE | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, !=, uint64_t);
+                       break;
+               case (BPF_JMP | BPF_JGT | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, >, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JLT | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, <, uint64_t);
+                       break;
+               case (BPF_JMP | BPF_JGE | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, >=, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JLE | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, <=, uint64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSGT | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, >, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSLT | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, <, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSGE | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, >=, int64_t);
+                       break;
+               case (BPF_JMP | EBPF_JSLE | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, <=, int64_t);
+                       break;
+               case (BPF_JMP | BPF_JSET | BPF_X):
+                       BPF_JMP_CND_REG(reg, ins, &, uint64_t);
+                       break;
+               /* call instructions */
+               case (BPF_JMP | EBPF_CALL):
+                       reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func(
+                               reg[EBPF_REG_1], reg[EBPF_REG_2],
+                               reg[EBPF_REG_3], reg[EBPF_REG_4],
+                               reg[EBPF_REG_5]);
+                       break;
+               /* return instruction */
+               case (BPF_JMP | EBPF_EXIT):
+                       return reg[EBPF_REG_0];
+               default:
+                       RTE_BPF_LOG(ERR,
+                               "%s(%p): invalid opcode %#x at pc: %#zx;\n",
+                               __func__, bpf, ins->code,
+                               (uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+                       return 0;
+               }
+       }
+
+       /* should never be reached */
+       RTE_VERIFY(0);
+       return 0;
+}
+
+__rte_experimental uint32_t
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+       uint32_t num)
+{
+       uint32_t i;
+       uint64_t reg[EBPF_REG_NUM];
+       uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+
+       for (i = 0; i != num; i++) {
+
+               reg[EBPF_REG_1] = (uintptr_t)ctx[i];
+               reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack));
+
+               rc[i] = bpf_exec(bpf, reg);
+       }
+
+       return i;
+}
+
+__rte_experimental uint64_t
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx)
+{
+       uint64_t rc;
+
+       rte_bpf_exec_burst(bpf, &ctx, &rc, 1);
+       return rc;
+}
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
new file mode 100644 (file)
index 0000000..5d7e65c
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _BPF_H_
+#define _BPF_H_
+
+#include <rte_bpf.h>
+#include <sys/mman.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BPF_STACK_SIZE     0x200
+
+struct rte_bpf {
+       struct rte_bpf_prm prm;
+       struct rte_bpf_jit jit;
+       size_t sz;
+       uint32_t stack_sz;
+};
+
+extern int bpf_validate(struct rte_bpf *bpf);
+
+extern int bpf_jit(struct rte_bpf *bpf);
+
+#ifdef RTE_ARCH_X86_64
+extern int bpf_jit_x86(struct rte_bpf *);
+#endif
+
+extern int rte_bpf_logtype;
+
+#define        RTE_BPF_LOG(lvl, fmt, args...) \
+       rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_H_ */
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
new file mode 100644 (file)
index 0000000..f28ecfb
--- /dev/null
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+static struct rte_bpf *
+bpf_load(const struct rte_bpf_prm *prm)
+{
+       uint8_t *buf;
+       struct rte_bpf *bpf;
+       size_t sz, bsz, insz, xsz;
+
+       xsz =  prm->nb_xsym * sizeof(prm->xsym[0]);
+       insz = prm->nb_ins * sizeof(prm->ins[0]);
+       bsz = sizeof(bpf[0]);
+       sz = insz + xsz + bsz;
+
+       buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (buf == MAP_FAILED)
+               return NULL;
+
+       bpf = (void *)buf;
+       bpf->sz = sz;
+
+       memcpy(&bpf->prm, prm, sizeof(bpf->prm));
+
+       memcpy(buf + bsz, prm->xsym, xsz);
+       memcpy(buf + bsz + xsz, prm->ins, insz);
+
+       bpf->prm.xsym = (void *)(buf + bsz);
+       bpf->prm.ins = (void *)(buf + bsz + xsz);
+
+       return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_load(const struct rte_bpf_prm *prm)
+{
+       struct rte_bpf *bpf;
+       int32_t rc;
+
+       if (prm == NULL || prm->ins == NULL) {
+               rte_errno = EINVAL;
+               return NULL;
+       }
+
+       bpf = bpf_load(prm);
+       if (bpf == NULL) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+
+       rc = bpf_validate(bpf);
+       if (rc == 0) {
+               bpf_jit(bpf);
+               if (mprotect(bpf, bpf->sz, PROT_READ) != 0)
+                       rc = -ENOMEM;
+       }
+
+       if (rc != 0) {
+               rte_bpf_destroy(bpf);
+               rte_errno = -rc;
+               return NULL;
+       }
+
+       return bpf;
+}
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
new file mode 100644 (file)
index 0000000..6a1b331
--- /dev/null
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+/*
+ * dummy one for now, need more work.
+ */
+int
+bpf_validate(struct rte_bpf *bpf)
+{
+       int32_t rc, ofs, stack_sz;
+       uint32_t i, op, dr;
+       const struct ebpf_insn *ins;
+
+       rc = 0;
+       stack_sz = 0;
+       for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+               ins = bpf->prm.ins + i;
+               op = ins->code;
+               dr = ins->dst_reg;
+               ofs = ins->off;
+
+               if ((BPF_CLASS(op) == BPF_STX || BPF_CLASS(op) == BPF_ST) &&
+                               dr == EBPF_REG_10) {
+                       ofs -= sizeof(uint64_t);
+                       stack_sz = RTE_MIN(ofs, stack_sz);
+               }
+       }
+
+       if (stack_sz != 0) {
+               stack_sz = -stack_sz;
+               if (stack_sz > MAX_BPF_STACK_SIZE)
+                       rc = -ERANGE;
+               else
+                       bpf->stack_sz = stack_sz;
+       }
+
+       if (rc != 0)
+               RTE_BPF_LOG(ERR, "%s(%p) failed, error code: %d;\n",
+                       __func__, bpf, rc);
+       return rc;
+}
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
new file mode 100644 (file)
index 0000000..4fa000f
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('bpf.c',
+               'bpf_exec.c',
+               'bpf_load.c',
+               'bpf_validate.c')
+
+install_headers = files('bpf_def.h',
+                       'rte_bpf.h')
+
+deps += ['mbuf', 'net']
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
new file mode 100644 (file)
index 0000000..4b3d970
--- /dev/null
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_H_
+#define _RTE_BPF_H_
+
+/**
+ * @file
+ *
+ * RTE BPF support.
+ * librte_bpf provides a framework to load and execute eBPF bytecode
+ * inside user-space dpdk based applications.
+ * It supports basic set of features from eBPF spec
+ * (https://www.kernel.org/doc/Documentation/networking/filter.txt).
+ */
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <bpf_def.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Possible types for function/BPF program arguments.
+ */
+enum rte_bpf_arg_type {
+       RTE_BPF_ARG_UNDEF,      /**< undefined */
+       RTE_BPF_ARG_RAW,        /**< scalar value */
+       RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */
+       RTE_BPF_ARG_PTR_MBUF,   /**< pointer to rte_mbuf */
+       RTE_BPF_ARG_PTR_STACK,
+};
+
+/**
+ * function argument information
+ */
+struct rte_bpf_arg {
+       enum rte_bpf_arg_type type;
+       size_t size;     /**< for pointer types, size of data it points to */
+       size_t buf_size;
+       /**< for mbuf ptr type, max size of rte_mbuf data buffer */
+};
+
+/**
+ * determine is argument a pointer
+ */
+#define RTE_BPF_ARG_PTR_TYPE(x)        ((x) & RTE_BPF_ARG_PTR)
+
+/**
+ * Possible types for external symbols.
+ */
+enum rte_bpf_xtype {
+       RTE_BPF_XTYPE_FUNC, /**< function */
+       RTE_BPF_XTYPE_VAR,  /**< variable */
+       RTE_BPF_XTYPE_NUM
+};
+
+/**
+ * Definition for external symbols available in the BPF program.
+ */
+struct rte_bpf_xsym {
+       const char *name;        /**< name */
+       enum rte_bpf_xtype type; /**< type */
+       union {
+               uint64_t (*func)(uint64_t, uint64_t, uint64_t,
+                               uint64_t, uint64_t);
+               void *var;
+       }; /**< value */
+};
+
+/**
+ * Input parameters for loading eBPF code.
+ */
+struct rte_bpf_prm {
+       const struct ebpf_insn *ins; /**< array of eBPF instructions */
+       uint32_t nb_ins;            /**< number of instructions in ins */
+       const struct rte_bpf_xsym *xsym;
+       /**< array of external symbols that eBPF code is allowed to reference */
+       uint32_t nb_xsym; /**< number of elements in xsym */
+       struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */
+};
+
+/**
+ * Information about compiled into native ISA eBPF code.
+ */
+struct rte_bpf_jit {
+       uint64_t (*func)(void *); /**< JIT-ed native code */
+       size_t sz;                /**< size of JIT-ed code */
+};
+
+struct rte_bpf;
+
+/**
+ * De-allocate all memory used by this eBPF execution context.
+ *
+ * @param bpf
+ *   BPF handle to destroy.
+ */
+void rte_bpf_destroy(struct rte_bpf *bpf);
+
+/**
+ * Create a new eBPF execution context and load given BPF code into it.
+ *
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @return
+ *   BPF handle that is used in future BPF operations,
+ *   or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ *   - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf *rte_bpf_load(const struct rte_bpf_prm *prm);
+
+/**
+ * Execute given BPF bytecode.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   pointer to input context.
+ * @return
+ *   BPF execution return value.
+ */
+uint64_t rte_bpf_exec(const struct rte_bpf *bpf, void *ctx);
+
+/**
+ * Execute given BPF bytecode over a set of input contexts.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   array of pointers to the input contexts.
+ * @param rc
+ *   array of return values (one per input).
+ * @param num
+ *   number of elements in ctx[] (and rc[]).
+ * @return
+ *   number of successfully processed inputs.
+ */
+uint32_t rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[],
+       uint64_t rc[], uint32_t num);
+
+/**
+ * Provide information about natively compield code for given BPF handle.
+ *
+ * @param bpf
+ *   handle for the BPF code.
+ * @param jit
+ *   pointer to the rte_bpf_jit structure to be filled with related data.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
new file mode 100644 (file)
index 0000000..ea1d621
--- /dev/null
@@ -0,0 +1,11 @@
+EXPERIMENTAL {
+       global:
+
+       rte_bpf_destroy;
+       rte_bpf_exec;
+       rte_bpf_exec_burst;
+       rte_bpf_get_jit;
+       rte_bpf_load;
+
+       local: *;
+};
index 0d55a64..fcc3e8d 100644 (file)
@@ -23,7 +23,7 @@ libraries = [ 'compat', # just a header, used for versioning
        # add pkt framework libs which use other libs from above
        'port', 'table', 'pipeline',
        # flow_classify lib depends on pkt framework table lib
-       'flow_classify']
+       'flow_classify', 'bpf']
 
 foreach l:libraries
        build = true
index 9511f20..15a0121 100644 (file)
@@ -81,6 +81,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS)  += -lrte_latencystats
 _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -lrte_power
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_EFD)            += -lrte_efd
+_LDLIBS-$(CONFIG_RTE_LIBRTE_BPF)            += -lrte_bpf
 
 _LDLIBS-y += --whole-archive