From: Konstantin Ananyev Date: Thu, 10 May 2018 10:23:03 +0000 (+0100) Subject: bpf: add BPF loading and execution framework X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=94972f35a02e;p=dpdk.git bpf: add BPF loading and execution framework librte_bpf provides a framework to load and execute eBPF bytecode inside user-space dpdk based applications. It supports basic set of features from eBPF spec (https://www.kernel.org/doc/Documentation/networking/filter.txt). Not currently supported features: - JIT - cBPF - tail-pointer call - eBPF MAP - skb - function calls for 32-bit apps - mbuf pointer as input parameter for 32-bit apps Signed-off-by: Konstantin Ananyev Acked-by: Ferruh Yigit --- diff --git a/MAINTAINERS b/MAINTAINERS index 93e344b320..8b4972ec32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1100,6 +1100,10 @@ Latency statistics M: Reshma Pattan F: lib/librte_latencystats/ +BPF +M: Konstantin Ananyev +F: lib/librte_bpf/ + Test Applications ----------------- diff --git a/config/common_base b/config/common_base index 452e800e74..10f9f903ae 100644 --- a/config/common_base +++ b/config/common_base @@ -868,6 +868,11 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=n # CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD=n +# +# Compile librte_bpf +# +CONFIG_RTE_LIBRTE_BPF=y + # # Compile the test application # diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index 270aae2882..9265907fa3 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -114,7 +114,8 @@ The public API headers are grouped by topics: [EFD] (@ref rte_efd.h), [ACL] (@ref rte_acl.h), [member] (@ref rte_member.h), - [flow classify] (@ref rte_flow_classify.h) + [flow classify] (@ref rte_flow_classify.h), + [BPF] (@ref rte_bpf.h) - **containers**: [mbuf] (@ref rte_mbuf.h), diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf index dfa4158365..a3b1c0bb62 100644 --- a/doc/api/doxy-api.conf +++ b/doc/api/doxy-api.conf @@ -45,6 +45,7 @@ INPUT = doc/api/doxy-api-index.md \ lib/librte_acl \ lib/librte_bbdev \ lib/librte_bitratestats \ + lib/librte_bpf \ lib/librte_cfgfile \ lib/librte_cmdline \ lib/librte_compat \ diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst index f0df4e0bde..aef6b6281b 100644 --- a/doc/guides/rel_notes/release_18_05.rst +++ b/doc/guides/rel_notes/release_18_05.rst @@ -222,6 +222,11 @@ New Features stats/xstats on shared memory from secondary process, and also pdump packets on those virtual devices. +* **Added the BPF Library.** + + The BPF Library provides the ability to load and execute + Enhanced Berkeley Packet Filter (eBPF) within user-space dpdk application. + API Changes ----------- @@ -483,6 +488,7 @@ The libraries prepended with a plus sign were incremented in this version. librte_acl.so.2 librte_bbdev.so.1 librte_bitratestats.so.2 + + librte_bpf.so.1 librte_bus_dpaa.so.1 librte_bus_fslmc.so.1 librte_bus_pci.so.1 diff --git a/lib/Makefile b/lib/Makefile index c59f68208a..d82462ba22 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,8 @@ DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ethdev DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net DEPDIRS-librte_gso += librte_mempool +DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf +DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile new file mode 100644 index 0000000000..da93065649 --- /dev/null +++ b/lib/librte_bpf/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_bpf.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +CFLAGS += -DALLOW_EXPERIMENTAL_API +LDLIBS += -lrte_net -lrte_eal +LDLIBS += -lrte_mempool -lrte_ring +LDLIBS += -lrte_mbuf -lrte_ethdev + +EXPORT_MAP := rte_bpf_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c + +# install header files +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c new file mode 100644 index 0000000000..d7f68c017b --- /dev/null +++ b/lib/librte_bpf/bpf.c @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf_impl.h" + +int rte_bpf_logtype; + +__rte_experimental void +rte_bpf_destroy(struct rte_bpf *bpf) +{ + if (bpf != NULL) { + if (bpf->jit.func != NULL) + munmap(bpf->jit.func, bpf->jit.sz); + munmap(bpf, bpf->sz); + } +} + +__rte_experimental int +rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit) +{ + if (bpf == NULL || jit == NULL) + return -EINVAL; + + jit[0] = bpf->jit; + return 0; +} + +int +bpf_jit(struct rte_bpf *bpf) +{ + int32_t rc; + + rc = -ENOTSUP; + if (rc != 0) + RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n", + __func__, bpf, rc); + return rc; +} + +RTE_INIT(rte_bpf_init_log); + +static void +rte_bpf_init_log(void) +{ + rte_bpf_logtype = rte_log_register("lib.bpf"); + if (rte_bpf_logtype >= 0) + rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO); +} diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h new file mode 100644 index 0000000000..6b69de3456 --- /dev/null +++ b/lib/librte_bpf/bpf_def.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. + * Copyright(c) 2018 Intel Corporation. + */ + +#ifndef _RTE_BPF_DEF_H_ +#define _RTE_BPF_DEF_H_ + +/** + * @file + * + * classic BPF (cBPF) and extended BPF (eBPF) related defines. + * For more information regarding cBPF and eBPF ISA and their differences, + * please refer to: + * https://www.kernel.org/doc/Documentation/networking/filter.txt. + * As a rule of thumb for that file: + * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix, + * while eBPF only ones start with ebpf(EBPF)) prefix. + */ + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The instruction encodings. + */ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +#define EBPF_ALU64 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 +#define EBPF_DW 0x18 + +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +#define EBPF_XADD 0xc0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define EBPF_MOV 0xb0 +#define EBPF_ARSH 0xc0 +#define EBPF_END 0xd0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 + +#define EBPF_JNE 0x50 +#define EBPF_JSGT 0x60 +#define EBPF_JSGE 0x70 +#define EBPF_CALL 0x80 +#define EBPF_EXIT 0x90 +#define EBPF_JLT 0xa0 +#define EBPF_JLE 0xb0 +#define EBPF_JSLT 0xc0 +#define EBPF_JSLE 0xd0 + +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +/* if BPF_OP(code) == EBPF_END */ +#define EBPF_TO_LE 0x00 /* convert to little-endian */ +#define EBPF_TO_BE 0x08 /* convert to big-endian */ + +/* + * eBPF registers + */ +enum { + EBPF_REG_0, /* return value from internal function/for eBPF program */ + EBPF_REG_1, /* 0-th argument to internal function */ + EBPF_REG_2, /* 1-th argument to internal function */ + EBPF_REG_3, /* 2-th argument to internal function */ + EBPF_REG_4, /* 3-th argument to internal function */ + EBPF_REG_5, /* 4-th argument to internal function */ + EBPF_REG_6, /* callee saved register */ + EBPF_REG_7, /* callee saved register */ + EBPF_REG_8, /* callee saved register */ + EBPF_REG_9, /* callee saved register */ + EBPF_REG_10, /* stack pointer (read-only) */ + EBPF_REG_NUM, +}; + +/* + * eBPF instruction format + */ +struct ebpf_insn { + uint8_t code; + uint8_t dst_reg:4; + uint8_t src_reg:4; + int16_t off; + int32_t imm; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_BPF_DEF_H_ */ diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c new file mode 100644 index 0000000000..e373b1f3dc --- /dev/null +++ b/lib/librte_bpf/bpf_exec.c @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +#define BPF_JMP_UNC(ins) ((ins) += (ins)->off) + +#define BPF_JMP_CND_REG(reg, ins, op, type) \ + ((ins) += \ + ((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \ + (ins)->off : 0) + +#define BPF_JMP_CND_IMM(reg, ins, op, type) \ + ((ins) += \ + ((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \ + (ins)->off : 0) + +#define BPF_NEG_ALU(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg])) + +#define EBPF_MOV_ALU_REG(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg]) + +#define BPF_OP_ALU_REG(reg, ins, op, type) \ + ((reg)[(ins)->dst_reg] = \ + (type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) + +#define EBPF_MOV_ALU_IMM(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = (type)(ins)->imm) + +#define BPF_OP_ALU_IMM(reg, ins, op, type) \ + ((reg)[(ins)->dst_reg] = \ + (type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) + +#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \ + if ((type)(reg)[(ins)->src_reg] == 0) { \ + RTE_BPF_LOG(ERR, \ + "%s(%p): division by 0 at pc: %#zx;\n", \ + __func__, bpf, \ + (uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \ + return 0; \ + } \ +} while (0) + +#define BPF_LD_REG(reg, ins, type) \ + ((reg)[(ins)->dst_reg] = \ + *(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off)) + +#define BPF_ST_IMM(reg, ins, type) \ + (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \ + (type)(ins)->imm) + +#define BPF_ST_REG(reg, ins, type) \ + (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \ + (type)(reg)[(ins)->src_reg]) + +#define BPF_ST_XADD_REG(reg, ins, tp) \ + (rte_atomic##tp##_add((rte_atomic##tp##_t *) \ + (uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \ + reg[ins->src_reg])) + +static inline void +bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins) +{ + uint64_t *v; + + v = reg + ins->dst_reg; + switch (ins->imm) { + case 16: + *v = rte_cpu_to_be_16(*v); + break; + case 32: + *v = rte_cpu_to_be_32(*v); + break; + case 64: + *v = rte_cpu_to_be_64(*v); + break; + } +} + +static inline void +bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins) +{ + uint64_t *v; + + v = reg + ins->dst_reg; + switch (ins->imm) { + case 16: + *v = rte_cpu_to_le_16(*v); + break; + case 32: + *v = rte_cpu_to_le_32(*v); + break; + case 64: + *v = rte_cpu_to_le_64(*v); + break; + } +} + +static inline uint64_t +bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM]) +{ + const struct ebpf_insn *ins; + + for (ins = bpf->prm.ins; ; ins++) { + switch (ins->code) { + /* 32 bit ALU IMM operations */ + case (BPF_ALU | BPF_ADD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, +, uint32_t); + break; + case (BPF_ALU | BPF_SUB | BPF_K): + BPF_OP_ALU_IMM(reg, ins, -, uint32_t); + break; + case (BPF_ALU | BPF_AND | BPF_K): + BPF_OP_ALU_IMM(reg, ins, &, uint32_t); + break; + case (BPF_ALU | BPF_OR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, |, uint32_t); + break; + case (BPF_ALU | BPF_LSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, <<, uint32_t); + break; + case (BPF_ALU | BPF_RSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, uint32_t); + break; + case (BPF_ALU | BPF_XOR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, ^, uint32_t); + break; + case (BPF_ALU | BPF_MUL | BPF_K): + BPF_OP_ALU_IMM(reg, ins, *, uint32_t); + break; + case (BPF_ALU | BPF_DIV | BPF_K): + BPF_OP_ALU_IMM(reg, ins, /, uint32_t); + break; + case (BPF_ALU | BPF_MOD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, %, uint32_t); + break; + case (BPF_ALU | EBPF_MOV | BPF_K): + EBPF_MOV_ALU_IMM(reg, ins, uint32_t); + break; + /* 32 bit ALU REG operations */ + case (BPF_ALU | BPF_ADD | BPF_X): + BPF_OP_ALU_REG(reg, ins, +, uint32_t); + break; + case (BPF_ALU | BPF_SUB | BPF_X): + BPF_OP_ALU_REG(reg, ins, -, uint32_t); + break; + case (BPF_ALU | BPF_AND | BPF_X): + BPF_OP_ALU_REG(reg, ins, &, uint32_t); + break; + case (BPF_ALU | BPF_OR | BPF_X): + BPF_OP_ALU_REG(reg, ins, |, uint32_t); + break; + case (BPF_ALU | BPF_LSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, <<, uint32_t); + break; + case (BPF_ALU | BPF_RSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, uint32_t); + break; + case (BPF_ALU | BPF_XOR | BPF_X): + BPF_OP_ALU_REG(reg, ins, ^, uint32_t); + break; + case (BPF_ALU | BPF_MUL | BPF_X): + BPF_OP_ALU_REG(reg, ins, *, uint32_t); + break; + case (BPF_ALU | BPF_DIV | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t); + BPF_OP_ALU_REG(reg, ins, /, uint32_t); + break; + case (BPF_ALU | BPF_MOD | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t); + BPF_OP_ALU_REG(reg, ins, %, uint32_t); + break; + case (BPF_ALU | EBPF_MOV | BPF_X): + EBPF_MOV_ALU_REG(reg, ins, uint32_t); + break; + case (BPF_ALU | BPF_NEG): + BPF_NEG_ALU(reg, ins, uint32_t); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_BE): + bpf_alu_be(reg, ins); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_LE): + bpf_alu_le(reg, ins); + break; + /* 64 bit ALU IMM operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, +, uint64_t); + break; + case (EBPF_ALU64 | BPF_SUB | BPF_K): + BPF_OP_ALU_IMM(reg, ins, -, uint64_t); + break; + case (EBPF_ALU64 | BPF_AND | BPF_K): + BPF_OP_ALU_IMM(reg, ins, &, uint64_t); + break; + case (EBPF_ALU64 | BPF_OR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, |, uint64_t); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, <<, uint64_t); + break; + case (EBPF_ALU64 | BPF_RSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, uint64_t); + break; + case (EBPF_ALU64 | EBPF_ARSH | BPF_K): + BPF_OP_ALU_IMM(reg, ins, >>, int64_t); + break; + case (EBPF_ALU64 | BPF_XOR | BPF_K): + BPF_OP_ALU_IMM(reg, ins, ^, uint64_t); + break; + case (EBPF_ALU64 | BPF_MUL | BPF_K): + BPF_OP_ALU_IMM(reg, ins, *, uint64_t); + break; + case (EBPF_ALU64 | BPF_DIV | BPF_K): + BPF_OP_ALU_IMM(reg, ins, /, uint64_t); + break; + case (EBPF_ALU64 | BPF_MOD | BPF_K): + BPF_OP_ALU_IMM(reg, ins, %, uint64_t); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_K): + EBPF_MOV_ALU_IMM(reg, ins, uint64_t); + break; + /* 64 bit ALU REG operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_X): + BPF_OP_ALU_REG(reg, ins, +, uint64_t); + break; + case (EBPF_ALU64 | BPF_SUB | BPF_X): + BPF_OP_ALU_REG(reg, ins, -, uint64_t); + break; + case (EBPF_ALU64 | BPF_AND | BPF_X): + BPF_OP_ALU_REG(reg, ins, &, uint64_t); + break; + case (EBPF_ALU64 | BPF_OR | BPF_X): + BPF_OP_ALU_REG(reg, ins, |, uint64_t); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, <<, uint64_t); + break; + case (EBPF_ALU64 | BPF_RSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, uint64_t); + break; + case (EBPF_ALU64 | EBPF_ARSH | BPF_X): + BPF_OP_ALU_REG(reg, ins, >>, int64_t); + break; + case (EBPF_ALU64 | BPF_XOR | BPF_X): + BPF_OP_ALU_REG(reg, ins, ^, uint64_t); + break; + case (EBPF_ALU64 | BPF_MUL | BPF_X): + BPF_OP_ALU_REG(reg, ins, *, uint64_t); + break; + case (EBPF_ALU64 | BPF_DIV | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t); + BPF_OP_ALU_REG(reg, ins, /, uint64_t); + break; + case (EBPF_ALU64 | BPF_MOD | BPF_X): + BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t); + BPF_OP_ALU_REG(reg, ins, %, uint64_t); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_X): + EBPF_MOV_ALU_REG(reg, ins, uint64_t); + break; + case (EBPF_ALU64 | BPF_NEG): + BPF_NEG_ALU(reg, ins, uint64_t); + break; + /* load instructions */ + case (BPF_LDX | BPF_MEM | BPF_B): + BPF_LD_REG(reg, ins, uint8_t); + break; + case (BPF_LDX | BPF_MEM | BPF_H): + BPF_LD_REG(reg, ins, uint16_t); + break; + case (BPF_LDX | BPF_MEM | BPF_W): + BPF_LD_REG(reg, ins, uint32_t); + break; + case (BPF_LDX | BPF_MEM | EBPF_DW): + BPF_LD_REG(reg, ins, uint64_t); + break; + /* load 64 bit immediate value */ + case (BPF_LD | BPF_IMM | EBPF_DW): + reg[ins->dst_reg] = (uint32_t)ins[0].imm | + (uint64_t)(uint32_t)ins[1].imm << 32; + ins++; + break; + /* store instructions */ + case (BPF_STX | BPF_MEM | BPF_B): + BPF_ST_REG(reg, ins, uint8_t); + break; + case (BPF_STX | BPF_MEM | BPF_H): + BPF_ST_REG(reg, ins, uint16_t); + break; + case (BPF_STX | BPF_MEM | BPF_W): + BPF_ST_REG(reg, ins, uint32_t); + break; + case (BPF_STX | BPF_MEM | EBPF_DW): + BPF_ST_REG(reg, ins, uint64_t); + break; + case (BPF_ST | BPF_MEM | BPF_B): + BPF_ST_IMM(reg, ins, uint8_t); + break; + case (BPF_ST | BPF_MEM | BPF_H): + BPF_ST_IMM(reg, ins, uint16_t); + break; + case (BPF_ST | BPF_MEM | BPF_W): + BPF_ST_IMM(reg, ins, uint32_t); + break; + case (BPF_ST | BPF_MEM | EBPF_DW): + BPF_ST_IMM(reg, ins, uint64_t); + break; + /* atomic add instructions */ + case (BPF_STX | EBPF_XADD | BPF_W): + BPF_ST_XADD_REG(reg, ins, 32); + break; + case (BPF_STX | EBPF_XADD | EBPF_DW): + BPF_ST_XADD_REG(reg, ins, 64); + break; + /* jump instructions */ + case (BPF_JMP | BPF_JA): + BPF_JMP_UNC(ins); + break; + /* jump IMM instructions */ + case (BPF_JMP | BPF_JEQ | BPF_K): + BPF_JMP_CND_IMM(reg, ins, ==, uint64_t); + break; + case (BPF_JMP | EBPF_JNE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, !=, uint64_t); + break; + case (BPF_JMP | BPF_JGT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >, uint64_t); + break; + case (BPF_JMP | EBPF_JLT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <, uint64_t); + break; + case (BPF_JMP | BPF_JGE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >=, uint64_t); + break; + case (BPF_JMP | EBPF_JLE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <=, uint64_t); + break; + case (BPF_JMP | EBPF_JSGT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >, int64_t); + break; + case (BPF_JMP | EBPF_JSLT | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <, int64_t); + break; + case (BPF_JMP | EBPF_JSGE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, >=, int64_t); + break; + case (BPF_JMP | EBPF_JSLE | BPF_K): + BPF_JMP_CND_IMM(reg, ins, <=, int64_t); + break; + case (BPF_JMP | BPF_JSET | BPF_K): + BPF_JMP_CND_IMM(reg, ins, &, uint64_t); + break; + /* jump REG instructions */ + case (BPF_JMP | BPF_JEQ | BPF_X): + BPF_JMP_CND_REG(reg, ins, ==, uint64_t); + break; + case (BPF_JMP | EBPF_JNE | BPF_X): + BPF_JMP_CND_REG(reg, ins, !=, uint64_t); + break; + case (BPF_JMP | BPF_JGT | BPF_X): + BPF_JMP_CND_REG(reg, ins, >, uint64_t); + break; + case (BPF_JMP | EBPF_JLT | BPF_X): + BPF_JMP_CND_REG(reg, ins, <, uint64_t); + break; + case (BPF_JMP | BPF_JGE | BPF_X): + BPF_JMP_CND_REG(reg, ins, >=, uint64_t); + break; + case (BPF_JMP | EBPF_JLE | BPF_X): + BPF_JMP_CND_REG(reg, ins, <=, uint64_t); + break; + case (BPF_JMP | EBPF_JSGT | BPF_X): + BPF_JMP_CND_REG(reg, ins, >, int64_t); + break; + case (BPF_JMP | EBPF_JSLT | BPF_X): + BPF_JMP_CND_REG(reg, ins, <, int64_t); + break; + case (BPF_JMP | EBPF_JSGE | BPF_X): + BPF_JMP_CND_REG(reg, ins, >=, int64_t); + break; + case (BPF_JMP | EBPF_JSLE | BPF_X): + BPF_JMP_CND_REG(reg, ins, <=, int64_t); + break; + case (BPF_JMP | BPF_JSET | BPF_X): + BPF_JMP_CND_REG(reg, ins, &, uint64_t); + break; + /* call instructions */ + case (BPF_JMP | EBPF_CALL): + reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func( + reg[EBPF_REG_1], reg[EBPF_REG_2], + reg[EBPF_REG_3], reg[EBPF_REG_4], + reg[EBPF_REG_5]); + break; + /* return instruction */ + case (BPF_JMP | EBPF_EXIT): + return reg[EBPF_REG_0]; + default: + RTE_BPF_LOG(ERR, + "%s(%p): invalid opcode %#x at pc: %#zx;\n", + __func__, bpf, ins->code, + (uintptr_t)ins - (uintptr_t)bpf->prm.ins); + return 0; + } + } + + /* should never be reached */ + RTE_VERIFY(0); + return 0; +} + +__rte_experimental uint32_t +rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[], + uint32_t num) +{ + uint32_t i; + uint64_t reg[EBPF_REG_NUM]; + uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)]; + + for (i = 0; i != num; i++) { + + reg[EBPF_REG_1] = (uintptr_t)ctx[i]; + reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack)); + + rc[i] = bpf_exec(bpf, reg); + } + + return i; +} + +__rte_experimental uint64_t +rte_bpf_exec(const struct rte_bpf *bpf, void *ctx) +{ + uint64_t rc; + + rte_bpf_exec_burst(bpf, &ctx, &rc, 1); + return rc; +} diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h new file mode 100644 index 0000000000..5d7e65c310 --- /dev/null +++ b/lib/librte_bpf/bpf_impl.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _BPF_H_ +#define _BPF_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_BPF_STACK_SIZE 0x200 + +struct rte_bpf { + struct rte_bpf_prm prm; + struct rte_bpf_jit jit; + size_t sz; + uint32_t stack_sz; +}; + +extern int bpf_validate(struct rte_bpf *bpf); + +extern int bpf_jit(struct rte_bpf *bpf); + +#ifdef RTE_ARCH_X86_64 +extern int bpf_jit_x86(struct rte_bpf *); +#endif + +extern int rte_bpf_logtype; + +#define RTE_BPF_LOG(lvl, fmt, args...) \ + rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args) + +#ifdef __cplusplus +} +#endif + +#endif /* _BPF_H_ */ diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c new file mode 100644 index 0000000000..f28ecfb4d6 --- /dev/null +++ b/lib/librte_bpf/bpf_load.c @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +static struct rte_bpf * +bpf_load(const struct rte_bpf_prm *prm) +{ + uint8_t *buf; + struct rte_bpf *bpf; + size_t sz, bsz, insz, xsz; + + xsz = prm->nb_xsym * sizeof(prm->xsym[0]); + insz = prm->nb_ins * sizeof(prm->ins[0]); + bsz = sizeof(bpf[0]); + sz = insz + xsz + bsz; + + buf = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) + return NULL; + + bpf = (void *)buf; + bpf->sz = sz; + + memcpy(&bpf->prm, prm, sizeof(bpf->prm)); + + memcpy(buf + bsz, prm->xsym, xsz); + memcpy(buf + bsz + xsz, prm->ins, insz); + + bpf->prm.xsym = (void *)(buf + bsz); + bpf->prm.ins = (void *)(buf + bsz + xsz); + + return bpf; +} + +__rte_experimental struct rte_bpf * +rte_bpf_load(const struct rte_bpf_prm *prm) +{ + struct rte_bpf *bpf; + int32_t rc; + + if (prm == NULL || prm->ins == NULL) { + rte_errno = EINVAL; + return NULL; + } + + bpf = bpf_load(prm); + if (bpf == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + rc = bpf_validate(bpf); + if (rc == 0) { + bpf_jit(bpf); + if (mprotect(bpf, bpf->sz, PROT_READ) != 0) + rc = -ENOMEM; + } + + if (rc != 0) { + rte_bpf_destroy(bpf); + rte_errno = -rc; + return NULL; + } + + return bpf; +} diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c new file mode 100644 index 0000000000..6a1b331810 --- /dev/null +++ b/lib/librte_bpf/bpf_validate.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf_impl.h" + +/* + * dummy one for now, need more work. + */ +int +bpf_validate(struct rte_bpf *bpf) +{ + int32_t rc, ofs, stack_sz; + uint32_t i, op, dr; + const struct ebpf_insn *ins; + + rc = 0; + stack_sz = 0; + for (i = 0; i != bpf->prm.nb_ins; i++) { + + ins = bpf->prm.ins + i; + op = ins->code; + dr = ins->dst_reg; + ofs = ins->off; + + if ((BPF_CLASS(op) == BPF_STX || BPF_CLASS(op) == BPF_ST) && + dr == EBPF_REG_10) { + ofs -= sizeof(uint64_t); + stack_sz = RTE_MIN(ofs, stack_sz); + } + } + + if (stack_sz != 0) { + stack_sz = -stack_sz; + if (stack_sz > MAX_BPF_STACK_SIZE) + rc = -ERANGE; + else + bpf->stack_sz = stack_sz; + } + + if (rc != 0) + RTE_BPF_LOG(ERR, "%s(%p) failed, error code: %d;\n", + __func__, bpf, rc); + return rc; +} diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build new file mode 100644 index 0000000000..4fa000f5ac --- /dev/null +++ b/lib/librte_bpf/meson.build @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +allow_experimental_apis = true +sources = files('bpf.c', + 'bpf_exec.c', + 'bpf_load.c', + 'bpf_validate.c') + +install_headers = files('bpf_def.h', + 'rte_bpf.h') + +deps += ['mbuf', 'net'] diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h new file mode 100644 index 0000000000..4b3d970b9c --- /dev/null +++ b/lib/librte_bpf/rte_bpf.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_BPF_H_ +#define _RTE_BPF_H_ + +/** + * @file + * + * RTE BPF support. + * librte_bpf provides a framework to load and execute eBPF bytecode + * inside user-space dpdk based applications. + * It supports basic set of features from eBPF spec + * (https://www.kernel.org/doc/Documentation/networking/filter.txt). + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Possible types for function/BPF program arguments. + */ +enum rte_bpf_arg_type { + RTE_BPF_ARG_UNDEF, /**< undefined */ + RTE_BPF_ARG_RAW, /**< scalar value */ + RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */ + RTE_BPF_ARG_PTR_MBUF, /**< pointer to rte_mbuf */ + RTE_BPF_ARG_PTR_STACK, +}; + +/** + * function argument information + */ +struct rte_bpf_arg { + enum rte_bpf_arg_type type; + size_t size; /**< for pointer types, size of data it points to */ + size_t buf_size; + /**< for mbuf ptr type, max size of rte_mbuf data buffer */ +}; + +/** + * determine is argument a pointer + */ +#define RTE_BPF_ARG_PTR_TYPE(x) ((x) & RTE_BPF_ARG_PTR) + +/** + * Possible types for external symbols. + */ +enum rte_bpf_xtype { + RTE_BPF_XTYPE_FUNC, /**< function */ + RTE_BPF_XTYPE_VAR, /**< variable */ + RTE_BPF_XTYPE_NUM +}; + +/** + * Definition for external symbols available in the BPF program. + */ +struct rte_bpf_xsym { + const char *name; /**< name */ + enum rte_bpf_xtype type; /**< type */ + union { + uint64_t (*func)(uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t); + void *var; + }; /**< value */ +}; + +/** + * Input parameters for loading eBPF code. + */ +struct rte_bpf_prm { + const struct ebpf_insn *ins; /**< array of eBPF instructions */ + uint32_t nb_ins; /**< number of instructions in ins */ + const struct rte_bpf_xsym *xsym; + /**< array of external symbols that eBPF code is allowed to reference */ + uint32_t nb_xsym; /**< number of elements in xsym */ + struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */ +}; + +/** + * Information about compiled into native ISA eBPF code. + */ +struct rte_bpf_jit { + uint64_t (*func)(void *); /**< JIT-ed native code */ + size_t sz; /**< size of JIT-ed code */ +}; + +struct rte_bpf; + +/** + * De-allocate all memory used by this eBPF execution context. + * + * @param bpf + * BPF handle to destroy. + */ +void rte_bpf_destroy(struct rte_bpf *bpf); + +/** + * Create a new eBPF execution context and load given BPF code into it. + * + * @param prm + * Parameters used to create and initialise the BPF exeution context. + * @return + * BPF handle that is used in future BPF operations, + * or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - EINVAL - invalid parameter passed to function + * - ENOMEM - can't reserve enough memory + */ +struct rte_bpf *rte_bpf_load(const struct rte_bpf_prm *prm); + +/** + * Execute given BPF bytecode. + * + * @param bpf + * handle for the BPF code to execute. + * @param ctx + * pointer to input context. + * @return + * BPF execution return value. + */ +uint64_t rte_bpf_exec(const struct rte_bpf *bpf, void *ctx); + +/** + * Execute given BPF bytecode over a set of input contexts. + * + * @param bpf + * handle for the BPF code to execute. + * @param ctx + * array of pointers to the input contexts. + * @param rc + * array of return values (one per input). + * @param num + * number of elements in ctx[] (and rc[]). + * @return + * number of successfully processed inputs. + */ +uint32_t rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], + uint64_t rc[], uint32_t num); + +/** + * Provide information about natively compield code for given BPF handle. + * + * @param bpf + * handle for the BPF code. + * @param jit + * pointer to the rte_bpf_jit structure to be filled with related data. + * @return + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +int rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BPF_H_ */ diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map new file mode 100644 index 0000000000..ea1d621c43 --- /dev/null +++ b/lib/librte_bpf/rte_bpf_version.map @@ -0,0 +1,11 @@ +EXPERIMENTAL { + global: + + rte_bpf_destroy; + rte_bpf_exec; + rte_bpf_exec_burst; + rte_bpf_get_jit; + rte_bpf_load; + + local: *; +}; diff --git a/lib/meson.build b/lib/meson.build index 0d55a647dd..fcc3e8db61 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -23,7 +23,7 @@ libraries = [ 'compat', # just a header, used for versioning # add pkt framework libs which use other libs from above 'port', 'table', 'pipeline', # flow_classify lib depends on pkt framework table lib - 'flow_classify'] + 'flow_classify', 'bpf'] foreach l:libraries build = true diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 9511f20e9c..15a0121fbf 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -81,6 +81,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += -lrte_latencystats _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power _LDLIBS-$(CONFIG_RTE_LIBRTE_EFD) += -lrte_efd +_LDLIBS-$(CONFIG_RTE_LIBRTE_BPF) += -lrte_bpf _LDLIBS-y += --whole-archive