1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
8 #include <rte_common.h>
12 #define A64_REG_MASK(r) ((r) & 0x1f)
13 #define A64_INVALID_OP_CODE (0xffffffff)
15 #define TMP_REG_1 (EBPF_REG_10 + 1)
16 #define TMP_REG_2 (EBPF_REG_10 + 2)
17 #define TMP_REG_3 (EBPF_REG_10 + 3)
19 #define EBPF_FP (EBPF_REG_10)
20 #define EBPF_OP_GET(op) (BPF_OP(op) >> 4)
28 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
29 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
32 uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
33 uint8_t off_to_b; /* Offset to branch instruction delta */
37 size_t stack_sz; /* Stack size */
38 uint32_t *ins; /* ARM64 instructions. NULL if first pass */
39 struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
40 uint32_t idx; /* Current instruction index */
41 uint32_t program_start; /* Program index, Just after prologue */
42 uint32_t program_sz; /* Program size. Found in first pass */
43 uint8_t foundcall; /* Found EBPF_CALL class code in eBPF pgm */
47 check_mov_hw(bool is64, const uint8_t val)
49 if (val == 16 || val == 0)
51 else if (is64 && val != 64 && val != 48 && val != 32)
60 return (r > 31) ? 1 : 0;
64 is_first_pass(struct a64_jit_ctx *ctx)
66 return (ctx->ins == NULL);
70 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
74 if (is_first_pass(ctx))
77 for (idx = 0; idx < limit; idx++) {
78 if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
80 "%s: invalid opcode at %u;\n", __func__, idx);
87 /* Emit an instruction */
89 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
92 insn = A64_INVALID_OP_CODE;
95 ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
101 emit_ret(struct a64_jit_ctx *ctx)
103 emit_insn(ctx, 0xd65f03c0, 0);
107 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
108 uint8_t rn, int16_t imm12)
112 imm = mask_imm(12, imm12);
113 insn = (!!is64) << 31;
114 insn |= (!!sub) << 30;
121 check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
125 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
127 emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
131 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
133 emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
137 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
139 emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
143 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
145 emit_mov(ctx, 1, rd, rn);
149 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
150 bool push, bool load, bool pre_index)
154 insn = (!!load) << 22;
155 insn |= (!!pre_index) << 24;
161 insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
165 emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
169 /* Emit stp rt, rt2, [sp, #-16]! */
171 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
173 emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
176 /* Emit ldp rt, rt2, [sp, #16] */
178 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
180 emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
187 mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
188 uint16_t imm16, uint8_t shift)
192 insn = (!!is64) << 31;
195 insn |= (shift/16) << 21;
199 emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
203 emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
205 uint16_t upper = val >> 16;
206 uint16_t lower = val & 0xffff;
208 /* Positive number */
209 if ((val & 1UL << 31) == 0) {
210 mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
212 mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
213 } else { /* Negative number */
214 if (upper == 0xffff) {
215 mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
217 mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
219 mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
225 u16_blocks_weight(const uint64_t val, bool one)
227 return (((val >> 0) & 0xffff) == (one ? 0xffff : 0x0000)) +
228 (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
229 (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
230 (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
234 emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
236 uint64_t nval = ~val;
240 return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
242 /* Find MOVN or MOVZ first */
243 movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
244 /* Find shift right value */
245 sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
246 sr = RTE_ALIGN_FLOOR(sr, 16);
250 mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
252 mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
256 if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
257 mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
263 #define A64_SUB 0x258
265 emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
266 uint8_t rm, uint16_t op)
270 insn = (!!is64) << 31;
271 insn |= op << 21; /* shift == 0 */
276 emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
280 emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
282 emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
286 emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
288 emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
292 emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
296 insn = (!!is64) << 31;
299 insn |= A64_ZR << 10;
303 emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
308 emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
309 uint8_t rn, uint8_t rm, uint16_t op)
314 insn = (!!is64) << 31;
321 emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
325 emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
327 emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
331 emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
332 uint8_t rm, uint8_t ra)
336 insn = (!!is64) << 31;
344 emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
349 emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
352 emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
353 emit_msub(ctx, is64, rd, tmp, rm, rd);
357 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
359 const uint32_t ebpf2a64_has_call[] = {
360 /* Map A64 R7 register as EBPF return register */
361 [EBPF_REG_0] = A64_R(7),
362 /* Map A64 arguments register as EBPF arguments register */
363 [EBPF_REG_1] = A64_R(0),
364 [EBPF_REG_2] = A64_R(1),
365 [EBPF_REG_3] = A64_R(2),
366 [EBPF_REG_4] = A64_R(3),
367 [EBPF_REG_5] = A64_R(4),
368 /* Map A64 callee save register as EBPF callee save register */
369 [EBPF_REG_6] = A64_R(19),
370 [EBPF_REG_7] = A64_R(20),
371 [EBPF_REG_8] = A64_R(21),
372 [EBPF_REG_9] = A64_R(22),
373 [EBPF_FP] = A64_R(25),
374 /* Map A64 scratch registers as temporary storage */
375 [TMP_REG_1] = A64_R(9),
376 [TMP_REG_2] = A64_R(10),
377 [TMP_REG_3] = A64_R(11),
380 const uint32_t ebpf2a64_no_call[] = {
381 /* Map A64 R7 register as EBPF return register */
382 [EBPF_REG_0] = A64_R(7),
383 /* Map A64 arguments register as EBPF arguments register */
384 [EBPF_REG_1] = A64_R(0),
385 [EBPF_REG_2] = A64_R(1),
386 [EBPF_REG_3] = A64_R(2),
387 [EBPF_REG_4] = A64_R(3),
388 [EBPF_REG_5] = A64_R(4),
390 * EBPF program does not have EBPF_CALL op code,
391 * Map A64 scratch registers as EBPF callee save registers.
393 [EBPF_REG_6] = A64_R(9),
394 [EBPF_REG_7] = A64_R(10),
395 [EBPF_REG_8] = A64_R(11),
396 [EBPF_REG_9] = A64_R(12),
397 /* Map A64 FP register as EBPF FP register */
399 /* Map remaining A64 scratch registers as temporary storage */
400 [TMP_REG_1] = A64_R(13),
401 [TMP_REG_2] = A64_R(14),
402 [TMP_REG_3] = A64_R(15),
406 return ebpf2a64_has_call[reg];
408 return ebpf2a64_no_call[reg];
412 * Procedure call standard for the arm64
413 * -------------------------------------
414 * R0..R7 - Parameter/result registers
415 * R8 - Indirect result location register
416 * R9..R15 - Scratch registers
417 * R15 - Platform Register
418 * R16 - First intra-procedure-call scratch register
419 * R17 - Second intra-procedure-call temporary register
420 * R19-R28 - Callee saved registers
421 * R29 - Frame pointer
422 * R30 - Link register
423 * R31 - Stack pointer
426 emit_prologue_has_call(struct a64_jit_ctx *ctx)
428 uint8_t r6, r7, r8, r9, fp;
430 r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
431 r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
432 r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
433 r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
434 fp = ebpf_to_a64_reg(ctx, EBPF_FP);
437 * eBPF prog stack layout
440 * eBPF prologue 0:+-----+ <= original A64_SP
442 * -16:+-----+ <= current A64_FP
443 * Callee saved registers | ... |
444 * EBPF_FP => -64:+-----+
446 * eBPF prog stack | ... |
448 * (EBPF_FP - bpf->stack_sz)=> +-----+
449 * Pad for A64_SP 16B alignment| PAD |
450 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
452 * | ... | Function call stack
457 emit_stack_push(ctx, A64_FP, A64_LR);
458 emit_mov_64(ctx, A64_FP, A64_SP);
459 emit_stack_push(ctx, r6, r7);
460 emit_stack_push(ctx, r8, r9);
462 * There is no requirement to save A64_R(28) in stack. Doing it here,
463 * because, A64_SP needs be to 16B aligned and STR vs STP
464 * takes same number of cycles(typically).
466 emit_stack_push(ctx, fp, A64_R(28));
467 emit_mov_64(ctx, fp, A64_SP);
469 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
473 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
475 uint8_t r6, r7, r8, r9, fp, r0;
477 r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
478 r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
479 r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
480 r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
481 fp = ebpf_to_a64_reg(ctx, EBPF_FP);
482 r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
485 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
486 emit_stack_pop(ctx, fp, A64_R(28));
487 emit_stack_pop(ctx, r8, r9);
488 emit_stack_pop(ctx, r6, r7);
489 emit_stack_pop(ctx, A64_FP, A64_LR);
490 emit_mov_64(ctx, A64_R(0), r0);
495 emit_prologue_no_call(struct a64_jit_ctx *ctx)
498 * eBPF prog stack layout without EBPF_CALL opcode
501 * eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
504 * eBPF prog stack | |
506 * (EBPF_FP - bpf->stack_sz)=> +-----+
507 * Pad for A64_SP 16B alignment| PAD |
508 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
510 * | ... | Function call stack
516 emit_mov_64(ctx, A64_FP, A64_SP);
517 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
522 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
525 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
526 emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
531 emit_prologue(struct a64_jit_ctx *ctx)
534 emit_prologue_has_call(ctx);
536 emit_prologue_no_call(ctx);
538 ctx->program_start = ctx->idx;
542 emit_epilogue(struct a64_jit_ctx *ctx)
544 ctx->program_sz = ctx->idx - ctx->program_start;
547 emit_epilogue_has_call(ctx);
549 emit_epilogue_no_call(ctx);
553 emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19)
557 imm = mask_imm(19, imm19);
558 insn = (!!is64) << 31;
563 emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19));
567 emit_b(struct a64_jit_ctx *ctx, int32_t imm26)
571 imm = mask_imm(26, imm26);
575 emit_insn(ctx, insn, check_imm(26, imm26));
579 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
581 uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
582 uint16_t jump_to_epilogue;
584 emit_cbnz(ctx, is64, src, 3);
585 emit_mov_imm(ctx, is64, r0, 0);
586 jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
587 emit_b(ctx, jump_to_epilogue);
591 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
593 const struct ebpf_insn *ins;
597 for (i = 0; i != bpf->prm.nb_ins; i++) {
598 ins = bpf->prm.ins + i;
603 case (BPF_JMP | EBPF_CALL):
611 * Walk through eBPF code and translate them to arm64 one.
614 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
616 uint8_t op, dst, src, tmp1, tmp2;
617 const struct ebpf_insn *ins;
623 /* Reset context fields */
625 /* arm64 SP must be aligned to 16 */
626 ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
627 tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
628 tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
632 for (i = 0; i != bpf->prm.nb_ins; i++) {
634 ins = bpf->prm.ins + i;
638 dst = ebpf_to_a64_reg(ctx, ins->dst_reg);
639 src = ebpf_to_a64_reg(ctx, ins->src_reg);
640 is64 = (BPF_CLASS(op) == EBPF_ALU64);
644 case (BPF_ALU | EBPF_MOV | BPF_X):
645 case (EBPF_ALU64 | EBPF_MOV | BPF_X):
646 emit_mov(ctx, is64, dst, src);
649 case (BPF_ALU | EBPF_MOV | BPF_K):
650 case (EBPF_ALU64 | EBPF_MOV | BPF_K):
651 emit_mov_imm(ctx, is64, dst, imm);
654 case (BPF_ALU | BPF_ADD | BPF_X):
655 case (EBPF_ALU64 | BPF_ADD | BPF_X):
656 emit_add(ctx, is64, dst, src);
659 case (BPF_ALU | BPF_ADD | BPF_K):
660 case (EBPF_ALU64 | BPF_ADD | BPF_K):
661 emit_mov_imm(ctx, is64, tmp1, imm);
662 emit_add(ctx, is64, dst, tmp1);
665 case (BPF_ALU | BPF_SUB | BPF_X):
666 case (EBPF_ALU64 | BPF_SUB | BPF_X):
667 emit_sub(ctx, is64, dst, src);
670 case (BPF_ALU | BPF_SUB | BPF_K):
671 case (EBPF_ALU64 | BPF_SUB | BPF_K):
672 emit_mov_imm(ctx, is64, tmp1, imm);
673 emit_sub(ctx, is64, dst, tmp1);
676 case (BPF_ALU | BPF_MUL | BPF_X):
677 case (EBPF_ALU64 | BPF_MUL | BPF_X):
678 emit_mul(ctx, is64, dst, src);
681 case (BPF_ALU | BPF_MUL | BPF_K):
682 case (EBPF_ALU64 | BPF_MUL | BPF_K):
683 emit_mov_imm(ctx, is64, tmp1, imm);
684 emit_mul(ctx, is64, dst, tmp1);
687 case (BPF_ALU | BPF_DIV | BPF_X):
688 case (EBPF_ALU64 | BPF_DIV | BPF_X):
689 emit_return_zero_if_src_zero(ctx, is64, src);
690 emit_div(ctx, is64, dst, src);
693 case (BPF_ALU | BPF_DIV | BPF_K):
694 case (EBPF_ALU64 | BPF_DIV | BPF_K):
695 emit_mov_imm(ctx, is64, tmp1, imm);
696 emit_div(ctx, is64, dst, tmp1);
699 case (BPF_ALU | BPF_MOD | BPF_X):
700 case (EBPF_ALU64 | BPF_MOD | BPF_X):
701 emit_return_zero_if_src_zero(ctx, is64, src);
702 emit_mod(ctx, is64, tmp1, dst, src);
705 case (BPF_ALU | BPF_MOD | BPF_K):
706 case (EBPF_ALU64 | BPF_MOD | BPF_K):
707 emit_mov_imm(ctx, is64, tmp1, imm);
708 emit_mod(ctx, is64, tmp2, dst, tmp1);
711 case (BPF_JMP | EBPF_EXIT):
716 "%s(%p): invalid opcode %#x at pc: %u;\n",
717 __func__, bpf, ins->code, i);
721 rc = check_invalid_args(ctx, ctx->idx);
727 * Produce a native ISA version of the given BPF code.
730 bpf_jit_arm64(struct rte_bpf *bpf)
732 struct a64_jit_ctx ctx;
736 /* Init JIT context */
737 memset(&ctx, 0, sizeof(ctx));
739 /* Find eBPF program has call class or not */
740 check_program_has_call(&ctx, bpf);
742 /* First pass to calculate total code size and valid jump offsets */
743 rc = emit(&ctx, bpf);
747 size = ctx.idx * sizeof(uint32_t);
748 /* Allocate JIT program memory */
749 ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
750 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
751 if (ctx.ins == MAP_FAILED) {
756 /* Second pass to generate code */
757 rc = emit(&ctx, bpf);
761 rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
767 /* Flush the icache */
768 __builtin___clear_cache(ctx.ins, ctx.ins + ctx.idx);
770 bpf->jit.func = (void *)ctx.ins;
776 munmap(ctx.ins, size);