1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
8 #include <rte_common.h>
12 #define A64_REG_MASK(r) ((r) & 0x1f)
13 #define A64_INVALID_OP_CODE (0xffffffff)
15 #define TMP_REG_1 (EBPF_REG_10 + 1)
16 #define TMP_REG_2 (EBPF_REG_10 + 2)
17 #define TMP_REG_3 (EBPF_REG_10 + 3)
19 #define EBPF_FP (EBPF_REG_10)
20 #define EBPF_OP_GET(op) (BPF_OP(op) >> 4)
28 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
29 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
32 uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
33 uint8_t off_to_b; /* Offset to branch instruction delta */
37 size_t stack_sz; /* Stack size */
38 uint32_t *ins; /* ARM64 instructions. NULL if first pass */
39 struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
40 uint32_t idx; /* Current instruction index */
41 uint32_t program_start; /* Program index, Just after prologue */
42 uint32_t program_sz; /* Program size. Found in first pass */
43 uint8_t foundcall; /* Found EBPF_CALL class code in eBPF pgm */
49 return (r > 31) ? 1 : 0;
53 is_first_pass(struct a64_jit_ctx *ctx)
55 return (ctx->ins == NULL);
59 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
63 if (is_first_pass(ctx))
66 for (idx = 0; idx < limit; idx++) {
67 if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
69 "%s: invalid opcode at %u;\n", __func__, idx);
76 /* Emit an instruction */
78 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
81 insn = A64_INVALID_OP_CODE;
84 ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
90 emit_ret(struct a64_jit_ctx *ctx)
92 emit_insn(ctx, 0xd65f03c0, 0);
96 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
97 uint8_t rn, int16_t imm12)
101 imm = mask_imm(12, imm12);
102 insn = (!!is64) << 31;
103 insn |= (!!sub) << 30;
110 check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
114 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
116 emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
120 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
122 emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
126 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
128 emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
132 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
134 emit_mov(ctx, 1, rd, rn);
138 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
139 bool push, bool load, bool pre_index)
143 insn = (!!load) << 22;
144 insn |= (!!pre_index) << 24;
150 insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
154 emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
158 /* Emit stp rt, rt2, [sp, #-16]! */
160 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
162 emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
165 /* Emit ldp rt, rt2, [sp, #16] */
167 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
169 emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
173 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
175 const uint32_t ebpf2a64_has_call[] = {
176 /* Map A64 R7 register as EBPF return register */
177 [EBPF_REG_0] = A64_R(7),
178 /* Map A64 arguments register as EBPF arguments register */
179 [EBPF_REG_1] = A64_R(0),
180 [EBPF_REG_2] = A64_R(1),
181 [EBPF_REG_3] = A64_R(2),
182 [EBPF_REG_4] = A64_R(3),
183 [EBPF_REG_5] = A64_R(4),
184 /* Map A64 callee save register as EBPF callee save register */
185 [EBPF_REG_6] = A64_R(19),
186 [EBPF_REG_7] = A64_R(20),
187 [EBPF_REG_8] = A64_R(21),
188 [EBPF_REG_9] = A64_R(22),
189 [EBPF_FP] = A64_R(25),
190 /* Map A64 scratch registers as temporary storage */
191 [TMP_REG_1] = A64_R(9),
192 [TMP_REG_2] = A64_R(10),
193 [TMP_REG_3] = A64_R(11),
196 const uint32_t ebpf2a64_no_call[] = {
197 /* Map A64 R7 register as EBPF return register */
198 [EBPF_REG_0] = A64_R(7),
199 /* Map A64 arguments register as EBPF arguments register */
200 [EBPF_REG_1] = A64_R(0),
201 [EBPF_REG_2] = A64_R(1),
202 [EBPF_REG_3] = A64_R(2),
203 [EBPF_REG_4] = A64_R(3),
204 [EBPF_REG_5] = A64_R(4),
206 * EBPF program does not have EBPF_CALL op code,
207 * Map A64 scratch registers as EBPF callee save registers.
209 [EBPF_REG_6] = A64_R(9),
210 [EBPF_REG_7] = A64_R(10),
211 [EBPF_REG_8] = A64_R(11),
212 [EBPF_REG_9] = A64_R(12),
213 /* Map A64 FP register as EBPF FP register */
215 /* Map remaining A64 scratch registers as temporary storage */
216 [TMP_REG_1] = A64_R(13),
217 [TMP_REG_2] = A64_R(14),
218 [TMP_REG_3] = A64_R(15),
222 return ebpf2a64_has_call[reg];
224 return ebpf2a64_no_call[reg];
228 * Procedure call standard for the arm64
229 * -------------------------------------
230 * R0..R7 - Parameter/result registers
231 * R8 - Indirect result location register
232 * R9..R15 - Scratch registers
233 * R15 - Platform Register
234 * R16 - First intra-procedure-call scratch register
235 * R17 - Second intra-procedure-call temporary register
236 * R19-R28 - Callee saved registers
237 * R29 - Frame pointer
238 * R30 - Link register
239 * R31 - Stack pointer
242 emit_prologue_has_call(struct a64_jit_ctx *ctx)
244 uint8_t r6, r7, r8, r9, fp;
246 r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
247 r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
248 r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
249 r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
250 fp = ebpf_to_a64_reg(ctx, EBPF_FP);
253 * eBPF prog stack layout
256 * eBPF prologue 0:+-----+ <= original A64_SP
258 * -16:+-----+ <= current A64_FP
259 * Callee saved registers | ... |
260 * EBPF_FP => -64:+-----+
262 * eBPF prog stack | ... |
264 * (EBPF_FP - bpf->stack_sz)=> +-----+
265 * Pad for A64_SP 16B alignment| PAD |
266 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
268 * | ... | Function call stack
273 emit_stack_push(ctx, A64_FP, A64_LR);
274 emit_mov_64(ctx, A64_FP, A64_SP);
275 emit_stack_push(ctx, r6, r7);
276 emit_stack_push(ctx, r8, r9);
278 * There is no requirement to save A64_R(28) in stack. Doing it here,
279 * because, A64_SP needs be to 16B aligned and STR vs STP
280 * takes same number of cycles(typically).
282 emit_stack_push(ctx, fp, A64_R(28));
283 emit_mov_64(ctx, fp, A64_SP);
285 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
289 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
291 uint8_t r6, r7, r8, r9, fp, r0;
293 r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
294 r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
295 r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
296 r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
297 fp = ebpf_to_a64_reg(ctx, EBPF_FP);
298 r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
301 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
302 emit_stack_pop(ctx, fp, A64_R(28));
303 emit_stack_pop(ctx, r8, r9);
304 emit_stack_pop(ctx, r6, r7);
305 emit_stack_pop(ctx, A64_FP, A64_LR);
306 emit_mov_64(ctx, A64_R(0), r0);
311 emit_prologue_no_call(struct a64_jit_ctx *ctx)
314 * eBPF prog stack layout without EBPF_CALL opcode
317 * eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
320 * eBPF prog stack | |
322 * (EBPF_FP - bpf->stack_sz)=> +-----+
323 * Pad for A64_SP 16B alignment| PAD |
324 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
326 * | ... | Function call stack
332 emit_mov_64(ctx, A64_FP, A64_SP);
333 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
338 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
341 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
342 emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
347 emit_prologue(struct a64_jit_ctx *ctx)
350 emit_prologue_has_call(ctx);
352 emit_prologue_no_call(ctx);
354 ctx->program_start = ctx->idx;
358 emit_epilogue(struct a64_jit_ctx *ctx)
360 ctx->program_sz = ctx->idx - ctx->program_start;
363 emit_epilogue_has_call(ctx);
365 emit_epilogue_no_call(ctx);
369 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
371 const struct ebpf_insn *ins;
375 for (i = 0; i != bpf->prm.nb_ins; i++) {
376 ins = bpf->prm.ins + i;
381 case (BPF_JMP | EBPF_CALL):
389 * Walk through eBPF code and translate them to arm64 one.
392 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
395 const struct ebpf_insn *ins;
399 /* Reset context fields */
401 /* arm64 SP must be aligned to 16 */
402 ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
406 for (i = 0; i != bpf->prm.nb_ins; i++) {
408 ins = bpf->prm.ins + i;
413 case (BPF_JMP | EBPF_EXIT):
418 "%s(%p): invalid opcode %#x at pc: %u;\n",
419 __func__, bpf, ins->code, i);
423 rc = check_invalid_args(ctx, ctx->idx);
429 * Produce a native ISA version of the given BPF code.
432 bpf_jit_arm64(struct rte_bpf *bpf)
434 struct a64_jit_ctx ctx;
438 /* Init JIT context */
439 memset(&ctx, 0, sizeof(ctx));
441 /* Find eBPF program has call class or not */
442 check_program_has_call(&ctx, bpf);
444 /* First pass to calculate total code size and valid jump offsets */
445 rc = emit(&ctx, bpf);
449 size = ctx.idx * sizeof(uint32_t);
450 /* Allocate JIT program memory */
451 ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
452 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
453 if (ctx.ins == MAP_FAILED) {
458 /* Second pass to generate code */
459 rc = emit(&ctx, bpf);
463 rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
469 /* Flush the icache */
470 __builtin___clear_cache(ctx.ins, ctx.ins + ctx.idx);
472 bpf->jit.func = (void *)ctx.ins;
478 munmap(ctx.ins, size);