X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_bpf%2Fbpf_jit_arm64.c;h=a5a5d46f0622a099c35404c719a538b4de813527;hb=1d51f154cd93bc8f623985032835d2218e05c893;hp=621bb7f4611f38259ba1589756b64108ef885d47;hpb=6861c01001ac3b1869f5cf1f492809875f753e67;p=dpdk.git diff --git a/lib/librte_bpf/bpf_jit_arm64.c b/lib/librte_bpf/bpf_jit_arm64.c index 621bb7f461..a5a5d46f06 100644 --- a/lib/librte_bpf/bpf_jit_arm64.c +++ b/lib/librte_bpf/bpf_jit_arm64.c @@ -3,17 +3,1449 @@ */ #include +#include #include +#include #include "bpf_impl.h" + +#define A64_REG_MASK(r) ((r) & 0x1f) +#define A64_INVALID_OP_CODE (0xffffffff) + +#define TMP_REG_1 (EBPF_REG_10 + 1) +#define TMP_REG_2 (EBPF_REG_10 + 2) +#define TMP_REG_3 (EBPF_REG_10 + 3) + +#define EBPF_FP (EBPF_REG_10) +#define EBPF_OP_GET(op) (BPF_OP(op) >> 4) + +#define A64_R(x) x +#define A64_FP 29 +#define A64_LR 30 +#define A64_SP 31 +#define A64_ZR 31 + +#define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n))) +#define mask_imm(n, val) ((val) & ((1 << (n)) - 1)) + +struct ebpf_a64_map { + uint32_t off; /* eBPF to arm64 insn offset mapping for jump */ + uint8_t off_to_b; /* Offset to branch instruction delta */ +}; + +struct a64_jit_ctx { + size_t stack_sz; /* Stack size */ + uint32_t *ins; /* ARM64 instructions. NULL if first pass */ + struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */ + uint32_t idx; /* Current instruction index */ + uint32_t program_start; /* Program index, Just after prologue */ + uint32_t program_sz; /* Program size. Found in first pass */ + uint8_t foundcall; /* Found EBPF_CALL class code in eBPF pgm */ +}; + +static int +check_immr_imms(bool is64, uint8_t immr, uint8_t imms) +{ + const unsigned int width = is64 ? 64 : 32; + + if (immr >= width || imms >= width) + return 1; + + return 0; +} + +static int +check_mov_hw(bool is64, const uint8_t val) +{ + if (val == 16 || val == 0) + return 0; + else if (is64 && val != 64 && val != 48 && val != 32) + return 1; + + return 0; +} + +static int +check_ls_sz(uint8_t sz) +{ + if (sz == BPF_B || sz == BPF_H || sz == BPF_W || sz == EBPF_DW) + return 0; + + return 1; +} + +static int +check_reg(uint8_t r) +{ + return (r > 31) ? 1 : 0; +} + +static int +is_first_pass(struct a64_jit_ctx *ctx) +{ + return (ctx->ins == NULL); +} + +static int +check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit) +{ + uint32_t idx; + + if (is_first_pass(ctx)) + return 0; + + for (idx = 0; idx < limit; idx++) { + if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) { + RTE_BPF_LOG(ERR, + "%s: invalid opcode at %u;\n", __func__, idx); + return -EINVAL; + } + } + return 0; +} + +static int +jump_offset_init(struct a64_jit_ctx *ctx, struct rte_bpf *bpf) +{ + uint32_t i; + + ctx->map = malloc(bpf->prm.nb_ins * sizeof(ctx->map[0])); + if (ctx->map == NULL) + return -ENOMEM; + + /* Fill with fake offsets */ + for (i = 0; i != bpf->prm.nb_ins; i++) { + ctx->map[i].off = INT32_MAX; + ctx->map[i].off_to_b = 0; + } + return 0; +} + +static void +jump_offset_fini(struct a64_jit_ctx *ctx) +{ + free(ctx->map); +} + +static void +jump_offset_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx) +{ + if (is_first_pass(ctx)) + ctx->map[ebpf_idx].off = ctx->idx; +} + +static void +jump_offset_to_branch_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx) +{ + if (is_first_pass(ctx)) + ctx->map[ebpf_idx].off_to_b = ctx->idx - ctx->map[ebpf_idx].off; + +} + +static int32_t +jump_offset_get(struct a64_jit_ctx *ctx, uint32_t from, int16_t offset) +{ + int32_t a64_from, a64_to; + + a64_from = ctx->map[from].off + ctx->map[from].off_to_b; + a64_to = ctx->map[from + offset + 1].off; + + if (a64_to == INT32_MAX) + return a64_to; + + return a64_to - a64_from; +} + +enum a64_cond_e { + A64_EQ = 0x0, /* == */ + A64_NE = 0x1, /* != */ + A64_CS = 0x2, /* Unsigned >= */ + A64_CC = 0x3, /* Unsigned < */ + A64_MI = 0x4, /* < 0 */ + A64_PL = 0x5, /* >= 0 */ + A64_VS = 0x6, /* Overflow */ + A64_VC = 0x7, /* No overflow */ + A64_HI = 0x8, /* Unsigned > */ + A64_LS = 0x9, /* Unsigned <= */ + A64_GE = 0xa, /* Signed >= */ + A64_LT = 0xb, /* Signed < */ + A64_GT = 0xc, /* Signed > */ + A64_LE = 0xd, /* Signed <= */ + A64_AL = 0xe, /* Always */ +}; + +static int +check_cond(uint8_t cond) +{ + return (cond >= A64_AL) ? 1 : 0; +} + +static uint8_t +ebpf_to_a64_cond(uint8_t op) +{ + switch (BPF_OP(op)) { + case BPF_JEQ: + return A64_EQ; + case BPF_JGT: + return A64_HI; + case EBPF_JLT: + return A64_CC; + case BPF_JGE: + return A64_CS; + case EBPF_JLE: + return A64_LS; + case BPF_JSET: + case EBPF_JNE: + return A64_NE; + case EBPF_JSGT: + return A64_GT; + case EBPF_JSLT: + return A64_LT; + case EBPF_JSGE: + return A64_GE; + case EBPF_JSLE: + return A64_LE; + default: + return UINT8_MAX; + } +} + +/* Emit an instruction */ +static inline void +emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error) +{ + if (error) + insn = A64_INVALID_OP_CODE; + + if (ctx->ins) + ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn); + + ctx->idx++; +} + +static void +emit_ret(struct a64_jit_ctx *ctx) +{ + emit_insn(ctx, 0xd65f03c0, 0); +} + +static void +emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd, + uint8_t rn, int16_t imm12) +{ + uint32_t insn, imm; + + imm = mask_imm(12, imm12); + insn = (!!is64) << 31; + insn |= (!!sub) << 30; + insn |= 0x11000000; + insn |= rd; + insn |= rn << 5; + insn |= imm << 10; + + emit_insn(ctx, insn, + check_reg(rd) || check_reg(rn) || check_imm(12, imm12)); +} + +static void +emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12) +{ + emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12); +} + +static void +emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12) +{ + emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12); +} + +static void +emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn) +{ + emit_add_sub_imm(ctx, is64, 0, rd, rn, 0); +} + +static void +emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn) +{ + emit_mov(ctx, 1, rd, rn); +} + +static void +emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn, + bool push, bool load, bool pre_index) +{ + uint32_t insn; + + insn = (!!load) << 22; + insn |= (!!pre_index) << 24; + insn |= 0xa8800000; + insn |= rt; + insn |= rn << 5; + insn |= rt2 << 10; + if (push) + insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */ + else + insn |= 0x2 << 15; + + emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2)); + +} + +/* Emit stp rt, rt2, [sp, #-16]! */ +static void +emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2) +{ + emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1); +} + +/* Emit ldp rt, rt2, [sp, #16] */ +static void +emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2) +{ + emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0); +} + +#define A64_MOVN 0 +#define A64_MOVZ 2 +#define A64_MOVK 3 +static void +mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type, + uint16_t imm16, uint8_t shift) +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= type << 29; + insn |= 0x25 << 23; + insn |= (shift/16) << 21; + insn |= imm16 << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift)); +} + +static void +emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val) +{ + uint16_t upper = val >> 16; + uint16_t lower = val & 0xffff; + + /* Positive number */ + if ((val & 1UL << 31) == 0) { + mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0); + if (upper) + mov_imm(ctx, is64, rd, A64_MOVK, upper, 16); + } else { /* Negative number */ + if (upper == 0xffff) { + mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0); + } else { + mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16); + if (lower != 0xffff) + mov_imm(ctx, is64, rd, A64_MOVK, lower, 0); + } + } +} + +static int +u16_blocks_weight(const uint64_t val, bool one) +{ + return (((val >> 0) & 0xffff) == (one ? 0xffff : 0x0000)) + + (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) + + (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) + + (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000)); +} + +static void +emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val) +{ + uint64_t nval = ~val; + int movn, sr; + + if (is64 == 0) + return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff)); + + /* Find MOVN or MOVZ first */ + movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false); + /* Find shift right value */ + sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1; + sr = RTE_ALIGN_FLOOR(sr, 16); + sr = RTE_MAX(sr, 0); + + if (movn) + mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr); + else + mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr); + + sr -= 16; + while (sr >= 0) { + if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000)) + mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr); + sr -= 16; + } +} + +static void +emit_ls(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, uint8_t rm, + bool load) +{ + uint32_t insn; + + insn = 0x1c1 << 21; + if (load) + insn |= 1 << 22; + if (sz == BPF_B) + insn |= 0 << 30; + else if (sz == BPF_H) + insn |= 1 << 30; + else if (sz == BPF_W) + insn |= 2 << 30; + else if (sz == EBPF_DW) + insn |= 3 << 30; + + insn |= rm << 16; + insn |= 0x1a << 10; /* LSL and S = 0 */ + insn |= rn << 5; + insn |= rt; + + emit_insn(ctx, insn, check_reg(rt) || check_reg(rn) || check_reg(rm) || + check_ls_sz(sz)); +} + +static void +emit_str(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, + uint8_t rm) +{ + emit_ls(ctx, sz, rt, rn, rm, 0); +} + +static void +emit_ldr(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, + uint8_t rm) +{ + emit_ls(ctx, sz, rt, rn, rm, 1); +} + +#define A64_ADD 0x58 +#define A64_SUB 0x258 +static void +emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn, + uint8_t rm, uint16_t op) +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= op << 21; /* shift == 0 */ + insn |= rm << 16; + insn |= rn << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rm)); +} + +static void +emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD); +} + +static void +emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB); +} + +static void +emit_neg(struct a64_jit_ctx *ctx, bool is64, uint8_t rd) +{ + emit_add_sub(ctx, is64, rd, A64_ZR, rd, A64_SUB); +} + +static void +emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= 0xd8 << 21; + insn |= rm << 16; + insn |= A64_ZR << 10; + insn |= rd << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rm)); +} + +#define A64_UDIV 0x2 +#define A64_LSLV 0x8 +#define A64_LSRV 0x9 +#define A64_ASRV 0xA +static void +emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, + uint8_t rn, uint8_t rm, uint16_t op) + +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= 0xd6 << 21; + insn |= rm << 16; + insn |= op << 10; + insn |= rn << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rm)); +} + +static void +emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV); +} + +static void +emit_lslv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSLV); +} + +static void +emit_lsrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSRV); +} + +static void +emit_asrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_ASRV); +} + +#define A64_UBFM 0x2 +#define A64_SBFM 0x0 +static void +emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn, + uint8_t immr, uint8_t imms, uint16_t op) + +{ + uint32_t insn; + + insn = (!!is64) << 31; + if (insn) + insn |= 1 << 22; /* Set N bit when is64 is set */ + insn |= op << 29; + insn |= 0x26 << 23; + insn |= immr << 16; + insn |= imms << 10; + insn |= rn << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || + check_immr_imms(is64, immr, imms)); +} +static void +emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm) +{ + const unsigned int width = is64 ? 64 : 32; + uint8_t imms, immr; + + immr = (width - imm) & (width - 1); + imms = width - 1 - imm; + + emit_bitfield(ctx, is64, rd, rd, immr, imms, A64_UBFM); +} + +static void +emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm) +{ + emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM); +} + +static void +emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm) +{ + emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM); +} + +#define A64_AND 0 +#define A64_OR 1 +#define A64_XOR 2 +static void +emit_logical(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, + uint8_t rm, uint16_t op) +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= op << 29; + insn |= 0x50 << 21; + insn |= rm << 16; + insn |= rd << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rm)); +} + +static void +emit_or(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_logical(ctx, is64, rd, rm, A64_OR); +} + +static void +emit_and(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_logical(ctx, is64, rd, rm, A64_AND); +} + +static void +emit_xor(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm) +{ + emit_logical(ctx, is64, rd, rm, A64_XOR); +} + +static void +emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn, + uint8_t rm, uint8_t ra) +{ + uint32_t insn; + + insn = (!!is64) << 31; + insn |= 0xd8 << 21; + insn |= rm << 16; + insn |= 0x1 << 15; + insn |= ra << 10; + insn |= rn << 5; + insn |= rd; + + emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) || + check_reg(ra)); +} + +static void +emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd, + uint8_t rm) +{ + emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV); + emit_msub(ctx, is64, rd, tmp, rm, rd); +} + +static void +emit_blr(struct a64_jit_ctx *ctx, uint8_t rn) +{ + uint32_t insn; + + insn = 0xd63f0000; + insn |= rn << 5; + + emit_insn(ctx, insn, check_reg(rn)); +} + +static void +emit_zero_extend(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm) +{ + switch (imm) { + case 16: + /* Zero-extend 16 bits into 64 bits */ + emit_bitfield(ctx, 1, rd, rd, 0, 15, A64_UBFM); + break; + case 32: + /* Zero-extend 32 bits into 64 bits */ + emit_bitfield(ctx, 1, rd, rd, 0, 31, A64_UBFM); + break; + case 64: + break; + default: + /* Generate error */ + emit_insn(ctx, 0, 1); + } +} + +static void +emit_rev(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm) +{ + uint32_t insn; + + insn = 0xdac00000; + insn |= rd << 5; + insn |= rd; + + switch (imm) { + case 16: + insn |= 1 << 10; + emit_insn(ctx, insn, check_reg(rd)); + emit_zero_extend(ctx, rd, 16); + break; + case 32: + insn |= 2 << 10; + emit_insn(ctx, insn, check_reg(rd)); + /* Upper 32 bits already cleared */ + break; + case 64: + insn |= 3 << 10; + emit_insn(ctx, insn, check_reg(rd)); + break; + default: + /* Generate error */ + emit_insn(ctx, insn, 1); + } +} + +static int +is_be(void) +{ +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + return 1; +#else + return 0; +#endif +} + +static void +emit_be(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm) +{ + if (is_be()) + emit_zero_extend(ctx, rd, imm); + else + emit_rev(ctx, rd, imm); +} + +static void +emit_le(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm) +{ + if (is_be()) + emit_rev(ctx, rd, imm); + else + emit_zero_extend(ctx, rd, imm); +} + +static uint8_t +ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg) +{ + const uint32_t ebpf2a64_has_call[] = { + /* Map A64 R7 register as EBPF return register */ + [EBPF_REG_0] = A64_R(7), + /* Map A64 arguments register as EBPF arguments register */ + [EBPF_REG_1] = A64_R(0), + [EBPF_REG_2] = A64_R(1), + [EBPF_REG_3] = A64_R(2), + [EBPF_REG_4] = A64_R(3), + [EBPF_REG_5] = A64_R(4), + /* Map A64 callee save register as EBPF callee save register */ + [EBPF_REG_6] = A64_R(19), + [EBPF_REG_7] = A64_R(20), + [EBPF_REG_8] = A64_R(21), + [EBPF_REG_9] = A64_R(22), + [EBPF_FP] = A64_R(25), + /* Map A64 scratch registers as temporary storage */ + [TMP_REG_1] = A64_R(9), + [TMP_REG_2] = A64_R(10), + [TMP_REG_3] = A64_R(11), + }; + + const uint32_t ebpf2a64_no_call[] = { + /* Map A64 R7 register as EBPF return register */ + [EBPF_REG_0] = A64_R(7), + /* Map A64 arguments register as EBPF arguments register */ + [EBPF_REG_1] = A64_R(0), + [EBPF_REG_2] = A64_R(1), + [EBPF_REG_3] = A64_R(2), + [EBPF_REG_4] = A64_R(3), + [EBPF_REG_5] = A64_R(4), + /* + * EBPF program does not have EBPF_CALL op code, + * Map A64 scratch registers as EBPF callee save registers. + */ + [EBPF_REG_6] = A64_R(9), + [EBPF_REG_7] = A64_R(10), + [EBPF_REG_8] = A64_R(11), + [EBPF_REG_9] = A64_R(12), + /* Map A64 FP register as EBPF FP register */ + [EBPF_FP] = A64_FP, + /* Map remaining A64 scratch registers as temporary storage */ + [TMP_REG_1] = A64_R(13), + [TMP_REG_2] = A64_R(14), + [TMP_REG_3] = A64_R(15), + }; + + if (ctx->foundcall) + return ebpf2a64_has_call[reg]; + else + return ebpf2a64_no_call[reg]; +} + +/* + * Procedure call standard for the arm64 + * ------------------------------------- + * R0..R7 - Parameter/result registers + * R8 - Indirect result location register + * R9..R15 - Scratch registers + * R15 - Platform Register + * R16 - First intra-procedure-call scratch register + * R17 - Second intra-procedure-call temporary register + * R19-R28 - Callee saved registers + * R29 - Frame pointer + * R30 - Link register + * R31 - Stack pointer + */ +static void +emit_prologue_has_call(struct a64_jit_ctx *ctx) +{ + uint8_t r6, r7, r8, r9, fp; + + r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6); + r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7); + r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8); + r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9); + fp = ebpf_to_a64_reg(ctx, EBPF_FP); + + /* + * eBPF prog stack layout + * + * high + * eBPF prologue 0:+-----+ <= original A64_SP + * |FP/LR| + * -16:+-----+ <= current A64_FP + * Callee saved registers | ... | + * EBPF_FP => -64:+-----+ + * | | + * eBPF prog stack | ... | + * | | + * (EBPF_FP - bpf->stack_sz)=> +-----+ + * Pad for A64_SP 16B alignment| PAD | + * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + */ + emit_stack_push(ctx, A64_FP, A64_LR); + emit_mov_64(ctx, A64_FP, A64_SP); + emit_stack_push(ctx, r6, r7); + emit_stack_push(ctx, r8, r9); + /* + * There is no requirement to save A64_R(28) in stack. Doing it here, + * because, A64_SP needs be to 16B aligned and STR vs STP + * takes same number of cycles(typically). + */ + emit_stack_push(ctx, fp, A64_R(28)); + emit_mov_64(ctx, fp, A64_SP); + if (ctx->stack_sz) + emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz); +} + +static void +emit_epilogue_has_call(struct a64_jit_ctx *ctx) +{ + uint8_t r6, r7, r8, r9, fp, r0; + + r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6); + r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7); + r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8); + r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9); + fp = ebpf_to_a64_reg(ctx, EBPF_FP); + r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0); + + if (ctx->stack_sz) + emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz); + emit_stack_pop(ctx, fp, A64_R(28)); + emit_stack_pop(ctx, r8, r9); + emit_stack_pop(ctx, r6, r7); + emit_stack_pop(ctx, A64_FP, A64_LR); + emit_mov_64(ctx, A64_R(0), r0); + emit_ret(ctx); +} + +static void +emit_prologue_no_call(struct a64_jit_ctx *ctx) +{ + /* + * eBPF prog stack layout without EBPF_CALL opcode + * + * high + * eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP + * | | + * | ... | + * eBPF prog stack | | + * | | + * (EBPF_FP - bpf->stack_sz)=> +-----+ + * Pad for A64_SP 16B alignment| PAD | + * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + */ + if (ctx->stack_sz) { + emit_mov_64(ctx, A64_FP, A64_SP); + emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz); + } +} + +static void +emit_epilogue_no_call(struct a64_jit_ctx *ctx) +{ + if (ctx->stack_sz) + emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz); + emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0)); + emit_ret(ctx); +} + +static void +emit_prologue(struct a64_jit_ctx *ctx) +{ + if (ctx->foundcall) + emit_prologue_has_call(ctx); + else + emit_prologue_no_call(ctx); + + ctx->program_start = ctx->idx; +} + +static void +emit_epilogue(struct a64_jit_ctx *ctx) +{ + ctx->program_sz = ctx->idx - ctx->program_start; + + if (ctx->foundcall) + emit_epilogue_has_call(ctx); + else + emit_epilogue_no_call(ctx); +} + +static void +emit_call(struct a64_jit_ctx *ctx, uint8_t tmp, void *func) +{ + uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0); + + emit_mov_imm(ctx, 1, tmp, (uint64_t)func); + emit_blr(ctx, tmp); + emit_mov_64(ctx, r0, A64_R(0)); +} + +static void +emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19) +{ + uint32_t insn, imm; + + imm = mask_imm(19, imm19); + insn = (!!is64) << 31; + insn |= 0x35 << 24; + insn |= imm << 5; + insn |= rt; + + emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19)); +} + +static void +emit_b(struct a64_jit_ctx *ctx, int32_t imm26) +{ + uint32_t insn, imm; + + imm = mask_imm(26, imm26); + insn = 0x5 << 26; + insn |= imm; + + emit_insn(ctx, insn, check_imm(26, imm26)); +} + +static void +emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src) +{ + uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0); + uint16_t jump_to_epilogue; + + emit_cbnz(ctx, is64, src, 3); + emit_mov_imm(ctx, is64, r0, 0); + jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx; + emit_b(ctx, jump_to_epilogue); +} + +static void +emit_stadd(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rn) +{ + uint32_t insn; + + insn = 0xb820001f; + insn |= (!!is64) << 30; + insn |= rs << 16; + insn |= rn << 5; + + emit_insn(ctx, insn, check_reg(rs) || check_reg(rn)); +} + +static void +emit_ldxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, uint8_t rn) +{ + uint32_t insn; + + insn = 0x885f7c00; + insn |= (!!is64) << 30; + insn |= rn << 5; + insn |= rt; + + emit_insn(ctx, insn, check_reg(rt) || check_reg(rn)); +} + +static void +emit_stxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rt, + uint8_t rn) +{ + uint32_t insn; + + insn = 0x88007c00; + insn |= (!!is64) << 30; + insn |= rs << 16; + insn |= rn << 5; + insn |= rt; + + emit_insn(ctx, insn, check_reg(rs) || check_reg(rt) || check_reg(rn)); +} + +static int +has_atomics(void) +{ + int rc = 0; + +#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS) + rc = 1; +#endif + return rc; +} + +static void +emit_xadd(struct a64_jit_ctx *ctx, uint8_t op, uint8_t tmp1, uint8_t tmp2, + uint8_t tmp3, uint8_t dst, int16_t off, uint8_t src) +{ + bool is64 = (BPF_SIZE(op) == EBPF_DW); + uint8_t rn; + + if (off) { + emit_mov_imm(ctx, 1, tmp1, off); + emit_add(ctx, 1, tmp1, dst); + rn = tmp1; + } else { + rn = dst; + } + + if (has_atomics()) { + emit_stadd(ctx, is64, src, rn); + } else { + emit_ldxr(ctx, is64, tmp2, rn); + emit_add(ctx, is64, tmp2, src); + emit_stxr(ctx, is64, tmp3, tmp2, rn); + emit_cbnz(ctx, is64, tmp3, -3); + } +} + +#define A64_CMP 0x6b00000f +#define A64_TST 0x6a00000f +static void +emit_cmp_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm, + uint32_t opc) +{ + uint32_t insn; + + insn = opc; + insn |= (!!is64) << 31; + insn |= rm << 16; + insn |= rn << 5; + + emit_insn(ctx, insn, check_reg(rn) || check_reg(rm)); +} + +static void +emit_cmp(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm) +{ + emit_cmp_tst(ctx, is64, rn, rm, A64_CMP); +} + +static void +emit_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm) +{ + emit_cmp_tst(ctx, is64, rn, rm, A64_TST); +} + +static void +emit_b_cond(struct a64_jit_ctx *ctx, uint8_t cond, int32_t imm19) +{ + uint32_t insn, imm; + + imm = mask_imm(19, imm19); + insn = 0x15 << 26; + insn |= imm << 5; + insn |= cond; + + emit_insn(ctx, insn, check_cond(cond) || check_imm(19, imm19)); +} + +static void +emit_branch(struct a64_jit_ctx *ctx, uint8_t op, uint32_t i, int16_t off) +{ + jump_offset_to_branch_update(ctx, i); + emit_b_cond(ctx, ebpf_to_a64_cond(op), jump_offset_get(ctx, i, off)); +} + +static void +check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf) +{ + const struct ebpf_insn *ins; + uint8_t op; + uint32_t i; + + for (i = 0; i != bpf->prm.nb_ins; i++) { + ins = bpf->prm.ins + i; + op = ins->code; + + switch (op) { + /* Call imm */ + case (BPF_JMP | EBPF_CALL): + ctx->foundcall = 1; + return; + } + } +} + +/* + * Walk through eBPF code and translate them to arm64 one. + */ +static int +emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf) +{ + uint8_t op, dst, src, tmp1, tmp2, tmp3; + const struct ebpf_insn *ins; + uint64_t u64; + int16_t off; + int32_t imm; + uint32_t i; + bool is64; + int rc; + + /* Reset context fields */ + ctx->idx = 0; + /* arm64 SP must be aligned to 16 */ + ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16); + tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1); + tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2); + tmp3 = ebpf_to_a64_reg(ctx, TMP_REG_3); + + emit_prologue(ctx); + + for (i = 0; i != bpf->prm.nb_ins; i++) { + + jump_offset_update(ctx, i); + ins = bpf->prm.ins + i; + op = ins->code; + off = ins->off; + imm = ins->imm; + + dst = ebpf_to_a64_reg(ctx, ins->dst_reg); + src = ebpf_to_a64_reg(ctx, ins->src_reg); + is64 = (BPF_CLASS(op) == EBPF_ALU64); + + switch (op) { + /* dst = src */ + case (BPF_ALU | EBPF_MOV | BPF_X): + case (EBPF_ALU64 | EBPF_MOV | BPF_X): + emit_mov(ctx, is64, dst, src); + break; + /* dst = imm */ + case (BPF_ALU | EBPF_MOV | BPF_K): + case (EBPF_ALU64 | EBPF_MOV | BPF_K): + emit_mov_imm(ctx, is64, dst, imm); + break; + /* dst += src */ + case (BPF_ALU | BPF_ADD | BPF_X): + case (EBPF_ALU64 | BPF_ADD | BPF_X): + emit_add(ctx, is64, dst, src); + break; + /* dst += imm */ + case (BPF_ALU | BPF_ADD | BPF_K): + case (EBPF_ALU64 | BPF_ADD | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_add(ctx, is64, dst, tmp1); + break; + /* dst -= src */ + case (BPF_ALU | BPF_SUB | BPF_X): + case (EBPF_ALU64 | BPF_SUB | BPF_X): + emit_sub(ctx, is64, dst, src); + break; + /* dst -= imm */ + case (BPF_ALU | BPF_SUB | BPF_K): + case (EBPF_ALU64 | BPF_SUB | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_sub(ctx, is64, dst, tmp1); + break; + /* dst *= src */ + case (BPF_ALU | BPF_MUL | BPF_X): + case (EBPF_ALU64 | BPF_MUL | BPF_X): + emit_mul(ctx, is64, dst, src); + break; + /* dst *= imm */ + case (BPF_ALU | BPF_MUL | BPF_K): + case (EBPF_ALU64 | BPF_MUL | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_mul(ctx, is64, dst, tmp1); + break; + /* dst /= src */ + case (BPF_ALU | BPF_DIV | BPF_X): + case (EBPF_ALU64 | BPF_DIV | BPF_X): + emit_return_zero_if_src_zero(ctx, is64, src); + emit_div(ctx, is64, dst, src); + break; + /* dst /= imm */ + case (BPF_ALU | BPF_DIV | BPF_K): + case (EBPF_ALU64 | BPF_DIV | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_div(ctx, is64, dst, tmp1); + break; + /* dst %= src */ + case (BPF_ALU | BPF_MOD | BPF_X): + case (EBPF_ALU64 | BPF_MOD | BPF_X): + emit_return_zero_if_src_zero(ctx, is64, src); + emit_mod(ctx, is64, tmp1, dst, src); + break; + /* dst %= imm */ + case (BPF_ALU | BPF_MOD | BPF_K): + case (EBPF_ALU64 | BPF_MOD | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_mod(ctx, is64, tmp2, dst, tmp1); + break; + /* dst |= src */ + case (BPF_ALU | BPF_OR | BPF_X): + case (EBPF_ALU64 | BPF_OR | BPF_X): + emit_or(ctx, is64, dst, src); + break; + /* dst |= imm */ + case (BPF_ALU | BPF_OR | BPF_K): + case (EBPF_ALU64 | BPF_OR | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_or(ctx, is64, dst, tmp1); + break; + /* dst &= src */ + case (BPF_ALU | BPF_AND | BPF_X): + case (EBPF_ALU64 | BPF_AND | BPF_X): + emit_and(ctx, is64, dst, src); + break; + /* dst &= imm */ + case (BPF_ALU | BPF_AND | BPF_K): + case (EBPF_ALU64 | BPF_AND | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_and(ctx, is64, dst, tmp1); + break; + /* dst ^= src */ + case (BPF_ALU | BPF_XOR | BPF_X): + case (EBPF_ALU64 | BPF_XOR | BPF_X): + emit_xor(ctx, is64, dst, src); + break; + /* dst ^= imm */ + case (BPF_ALU | BPF_XOR | BPF_K): + case (EBPF_ALU64 | BPF_XOR | BPF_K): + emit_mov_imm(ctx, is64, tmp1, imm); + emit_xor(ctx, is64, dst, tmp1); + break; + /* dst = -dst */ + case (BPF_ALU | BPF_NEG): + case (EBPF_ALU64 | BPF_NEG): + emit_neg(ctx, is64, dst); + break; + /* dst <<= src */ + case BPF_ALU | BPF_LSH | BPF_X: + case EBPF_ALU64 | BPF_LSH | BPF_X: + emit_lslv(ctx, is64, dst, src); + break; + /* dst <<= imm */ + case BPF_ALU | BPF_LSH | BPF_K: + case EBPF_ALU64 | BPF_LSH | BPF_K: + emit_lsl(ctx, is64, dst, imm); + break; + /* dst >>= src */ + case BPF_ALU | BPF_RSH | BPF_X: + case EBPF_ALU64 | BPF_RSH | BPF_X: + emit_lsrv(ctx, is64, dst, src); + break; + /* dst >>= imm */ + case BPF_ALU | BPF_RSH | BPF_K: + case EBPF_ALU64 | BPF_RSH | BPF_K: + emit_lsr(ctx, is64, dst, imm); + break; + /* dst >>= src (arithmetic) */ + case BPF_ALU | EBPF_ARSH | BPF_X: + case EBPF_ALU64 | EBPF_ARSH | BPF_X: + emit_asrv(ctx, is64, dst, src); + break; + /* dst >>= imm (arithmetic) */ + case BPF_ALU | EBPF_ARSH | BPF_K: + case EBPF_ALU64 | EBPF_ARSH | BPF_K: + emit_asr(ctx, is64, dst, imm); + break; + /* dst = be##imm(dst) */ + case (BPF_ALU | EBPF_END | EBPF_TO_BE): + emit_be(ctx, dst, imm); + break; + /* dst = le##imm(dst) */ + case (BPF_ALU | EBPF_END | EBPF_TO_LE): + emit_le(ctx, dst, imm); + break; + /* dst = *(size *) (src + off) */ + case (BPF_LDX | BPF_MEM | BPF_B): + case (BPF_LDX | BPF_MEM | BPF_H): + case (BPF_LDX | BPF_MEM | BPF_W): + case (BPF_LDX | BPF_MEM | EBPF_DW): + emit_mov_imm(ctx, 1, tmp1, off); + emit_ldr(ctx, BPF_SIZE(op), dst, src, tmp1); + break; + /* dst = imm64 */ + case (BPF_LD | BPF_IMM | EBPF_DW): + u64 = ((uint64_t)ins[1].imm << 32) | (uint32_t)imm; + emit_mov_imm(ctx, 1, dst, u64); + i++; + break; + /* *(size *)(dst + off) = src */ + case (BPF_STX | BPF_MEM | BPF_B): + case (BPF_STX | BPF_MEM | BPF_H): + case (BPF_STX | BPF_MEM | BPF_W): + case (BPF_STX | BPF_MEM | EBPF_DW): + emit_mov_imm(ctx, 1, tmp1, off); + emit_str(ctx, BPF_SIZE(op), src, dst, tmp1); + break; + /* *(size *)(dst + off) = imm */ + case (BPF_ST | BPF_MEM | BPF_B): + case (BPF_ST | BPF_MEM | BPF_H): + case (BPF_ST | BPF_MEM | BPF_W): + case (BPF_ST | BPF_MEM | EBPF_DW): + emit_mov_imm(ctx, 1, tmp1, imm); + emit_mov_imm(ctx, 1, tmp2, off); + emit_str(ctx, BPF_SIZE(op), tmp1, dst, tmp2); + break; + /* STX XADD: lock *(size *)(dst + off) += src */ + case (BPF_STX | EBPF_XADD | BPF_W): + case (BPF_STX | EBPF_XADD | EBPF_DW): + emit_xadd(ctx, op, tmp1, tmp2, tmp3, dst, off, src); + break; + /* PC += off */ + case (BPF_JMP | BPF_JA): + emit_b(ctx, jump_offset_get(ctx, i, off)); + break; + /* PC += off if dst COND imm */ + case (BPF_JMP | BPF_JEQ | BPF_K): + case (BPF_JMP | EBPF_JNE | BPF_K): + case (BPF_JMP | BPF_JGT | BPF_K): + case (BPF_JMP | EBPF_JLT | BPF_K): + case (BPF_JMP | BPF_JGE | BPF_K): + case (BPF_JMP | EBPF_JLE | BPF_K): + case (BPF_JMP | EBPF_JSGT | BPF_K): + case (BPF_JMP | EBPF_JSLT | BPF_K): + case (BPF_JMP | EBPF_JSGE | BPF_K): + case (BPF_JMP | EBPF_JSLE | BPF_K): + emit_mov_imm(ctx, 1, tmp1, imm); + emit_cmp(ctx, 1, dst, tmp1); + emit_branch(ctx, op, i, off); + break; + case (BPF_JMP | BPF_JSET | BPF_K): + emit_mov_imm(ctx, 1, tmp1, imm); + emit_tst(ctx, 1, dst, tmp1); + emit_branch(ctx, op, i, off); + break; + /* PC += off if dst COND src */ + case (BPF_JMP | BPF_JEQ | BPF_X): + case (BPF_JMP | EBPF_JNE | BPF_X): + case (BPF_JMP | BPF_JGT | BPF_X): + case (BPF_JMP | EBPF_JLT | BPF_X): + case (BPF_JMP | BPF_JGE | BPF_X): + case (BPF_JMP | EBPF_JLE | BPF_X): + case (BPF_JMP | EBPF_JSGT | BPF_X): + case (BPF_JMP | EBPF_JSLT | BPF_X): + case (BPF_JMP | EBPF_JSGE | BPF_X): + case (BPF_JMP | EBPF_JSLE | BPF_X): + emit_cmp(ctx, 1, dst, src); + emit_branch(ctx, op, i, off); + break; + case (BPF_JMP | BPF_JSET | BPF_X): + emit_tst(ctx, 1, dst, src); + emit_branch(ctx, op, i, off); + break; + /* Call imm */ + case (BPF_JMP | EBPF_CALL): + emit_call(ctx, tmp1, bpf->prm.xsym[ins->imm].func.val); + break; + /* Return r0 */ + case (BPF_JMP | EBPF_EXIT): + emit_epilogue(ctx); + break; + default: + RTE_BPF_LOG(ERR, + "%s(%p): invalid opcode %#x at pc: %u;\n", + __func__, bpf, ins->code, i); + return -EINVAL; + } + } + rc = check_invalid_args(ctx, ctx->idx); + + return rc; +} + /* * Produce a native ISA version of the given BPF code. */ int bpf_jit_arm64(struct rte_bpf *bpf) { - RTE_SET_USED(bpf); + struct a64_jit_ctx ctx; + size_t size; + int rc; + + /* Init JIT context */ + memset(&ctx, 0, sizeof(ctx)); + + /* Initialize the memory for eBPF to a64 insn offset map for jump */ + rc = jump_offset_init(&ctx, bpf); + if (rc) + goto error; + + /* Find eBPF program has call class or not */ + check_program_has_call(&ctx, bpf); + + /* First pass to calculate total code size and valid jump offsets */ + rc = emit(&ctx, bpf); + if (rc) + goto finish; + + size = ctx.idx * sizeof(uint32_t); + /* Allocate JIT program memory */ + ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ctx.ins == MAP_FAILED) { + rc = -ENOMEM; + goto finish; + } + + /* Second pass to generate code */ + rc = emit(&ctx, bpf); + if (rc) + goto munmap; + + rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0; + if (rc) { + rc = -errno; + goto munmap; + } + + /* Flush the icache */ + __builtin___clear_cache((char *)ctx.ins, (char *)(ctx.ins + ctx.idx)); + + bpf->jit.func = (void *)ctx.ins; + bpf->jit.sz = size; + + goto finish; - return -ENOTSUP; +munmap: + munmap(ctx.ins, size); +finish: + jump_offset_fini(&ctx); +error: + return rc; }