lib/bpf/bpf_jit_arm64.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(C) 2019 Marvell International Ltd.
   3  */
   4
   5 #include <errno.h>
   6 #include <stdbool.h>
   7
   8 #include <rte_common.h>
   9 #include <rte_byteorder.h>
  10
  11 #include "bpf_impl.h"
  12
  13 #define A64_REG_MASK(r)         ((r) & 0x1f)
  14 #define A64_INVALID_OP_CODE     (0xffffffff)
  15
  16 #define TMP_REG_1               (EBPF_REG_10 + 1)
  17 #define TMP_REG_2               (EBPF_REG_10 + 2)
  18 #define TMP_REG_3               (EBPF_REG_10 + 3)
  19
  20 #define EBPF_FP                 (EBPF_REG_10)
  21 #define EBPF_OP_GET(op)         (BPF_OP(op) >> 4)
  22
  23 #define A64_R(x)                x
  24 #define A64_FP                  29
  25 #define A64_LR                  30
  26 #define A64_SP                  31
  27 #define A64_ZR                  31
  28
  29 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
  30 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
  31
  32 struct ebpf_a64_map {
  33         uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
  34         uint8_t off_to_b; /* Offset to branch instruction delta */
  35 };
  36
  37 struct a64_jit_ctx {
  38         size_t stack_sz;          /* Stack size */
  39         uint32_t *ins;            /* ARM64 instructions. NULL if first pass */
  40         struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
  41         uint32_t idx;             /* Current instruction index */
  42         uint32_t program_start;   /* Program index, Just after prologue */
  43         uint32_t program_sz;      /* Program size. Found in first pass */
  44         uint8_t foundcall;        /* Found EBPF_CALL class code in eBPF pgm */
  45 };
  46
  47 static int
  48 check_immr_imms(bool is64, uint8_t immr, uint8_t imms)
  49 {
  50         const unsigned int width = is64 ? 64 : 32;
  51
  52         if (immr >= width || imms >= width)
  53                 return 1;
  54
  55         return 0;
  56 }
  57
  58 static int
  59 check_mov_hw(bool is64, const uint8_t val)
  60 {
  61         if (val == 16 || val == 0)
  62                 return 0;
  63         else if (is64 && val != 64 && val != 48 && val != 32)
  64                 return 1;
  65
  66         return 0;
  67 }
  68
  69 static int
  70 check_ls_sz(uint8_t sz)
  71 {
  72         if (sz == BPF_B || sz == BPF_H || sz == BPF_W || sz == EBPF_DW)
  73                 return 0;
  74
  75         return 1;
  76 }
  77
  78 static int
  79 check_reg(uint8_t r)
  80 {
  81         return (r > 31) ? 1 : 0;
  82 }
  83
  84 static int
  85 is_first_pass(struct a64_jit_ctx *ctx)
  86 {
  87         return (ctx->ins == NULL);
  88 }
  89
  90 static int
  91 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
  92 {
  93         uint32_t idx;
  94
  95         if (is_first_pass(ctx))
  96                 return 0;
  97
  98         for (idx = 0; idx < limit; idx++) {
  99                 if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
 100                         RTE_BPF_LOG(ERR,
 101                                 "%s: invalid opcode at %u;\n", __func__, idx);
 102                         return -EINVAL;
 103                 }
 104         }
 105         return 0;
 106 }
 107
 108 static int
 109 jump_offset_init(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 110 {
 111         uint32_t i;
 112
 113         ctx->map = malloc(bpf->prm.nb_ins * sizeof(ctx->map[0]));
 114         if (ctx->map == NULL)
 115                 return -ENOMEM;
 116
 117         /* Fill with fake offsets */
 118         for (i = 0; i != bpf->prm.nb_ins; i++) {
 119                 ctx->map[i].off = INT32_MAX;
 120                 ctx->map[i].off_to_b = 0;
 121         }
 122         return 0;
 123 }
 124
 125 static void
 126 jump_offset_fini(struct a64_jit_ctx *ctx)
 127 {
 128         free(ctx->map);
 129 }
 130
 131 static void
 132 jump_offset_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx)
 133 {
 134         if (is_first_pass(ctx))
 135                 ctx->map[ebpf_idx].off = ctx->idx;
 136 }
 137
 138 static void
 139 jump_offset_to_branch_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx)
 140 {
 141         if (is_first_pass(ctx))
 142                 ctx->map[ebpf_idx].off_to_b = ctx->idx - ctx->map[ebpf_idx].off;
 143
 144 }
 145
 146 static int32_t
 147 jump_offset_get(struct a64_jit_ctx *ctx, uint32_t from, int16_t offset)
 148 {
 149         int32_t a64_from, a64_to;
 150
 151         a64_from = ctx->map[from].off +  ctx->map[from].off_to_b;
 152         a64_to = ctx->map[from + offset + 1].off;
 153
 154         if (a64_to == INT32_MAX)
 155                 return a64_to;
 156
 157         return a64_to - a64_from;
 158 }
 159
 160 enum a64_cond_e {
 161         A64_EQ = 0x0, /* == */
 162         A64_NE = 0x1, /* != */
 163         A64_CS = 0x2, /* Unsigned >= */
 164         A64_CC = 0x3, /* Unsigned < */
 165         A64_MI = 0x4, /* < 0 */
 166         A64_PL = 0x5, /* >= 0 */
 167         A64_VS = 0x6, /* Overflow */
 168         A64_VC = 0x7, /* No overflow */
 169         A64_HI = 0x8, /* Unsigned > */
 170         A64_LS = 0x9, /* Unsigned <= */
 171         A64_GE = 0xa, /* Signed >= */
 172         A64_LT = 0xb, /* Signed < */
 173         A64_GT = 0xc, /* Signed > */
 174         A64_LE = 0xd, /* Signed <= */
 175         A64_AL = 0xe, /* Always */
 176 };
 177
 178 static int
 179 check_cond(uint8_t cond)
 180 {
 181         return (cond >= A64_AL) ? 1 : 0;
 182 }
 183
 184 static uint8_t
 185 ebpf_to_a64_cond(uint8_t op)
 186 {
 187         switch (BPF_OP(op)) {
 188         case BPF_JEQ:
 189                 return A64_EQ;
 190         case BPF_JGT:
 191                 return A64_HI;
 192         case EBPF_JLT:
 193                 return A64_CC;
 194         case BPF_JGE:
 195                 return A64_CS;
 196         case EBPF_JLE:
 197                 return A64_LS;
 198         case BPF_JSET:
 199         case EBPF_JNE:
 200                 return A64_NE;
 201         case EBPF_JSGT:
 202                 return A64_GT;
 203         case EBPF_JSLT:
 204                 return A64_LT;
 205         case EBPF_JSGE:
 206                 return A64_GE;
 207         case EBPF_JSLE:
 208                 return A64_LE;
 209         default:
 210                 return UINT8_MAX;
 211         }
 212 }
 213
 214 /* Emit an instruction */
 215 static inline void
 216 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
 217 {
 218         if (error)
 219                 insn = A64_INVALID_OP_CODE;
 220
 221         if (ctx->ins)
 222                 ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
 223
 224         ctx->idx++;
 225 }
 226
 227 static void
 228 emit_ret(struct a64_jit_ctx *ctx)
 229 {
 230         emit_insn(ctx, 0xd65f03c0, 0);
 231 }
 232
 233 static void
 234 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
 235                  uint8_t rn, int16_t imm12)
 236 {
 237         uint32_t insn, imm;
 238
 239         imm = mask_imm(12, imm12);
 240         insn = (!!is64) << 31;
 241         insn |= (!!sub) << 30;
 242         insn |= 0x11000000;
 243         insn |= rd;
 244         insn |= rn << 5;
 245         insn |= imm << 10;
 246
 247         emit_insn(ctx, insn,
 248                   check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
 249 }
 250
 251 static void
 252 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 253 {
 254         emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
 255 }
 256
 257 static void
 258 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 259 {
 260         emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
 261 }
 262
 263 static void
 264 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
 265 {
 266         emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
 267 }
 268
 269 static void
 270 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
 271 {
 272         emit_mov(ctx, 1, rd, rn);
 273 }
 274
 275 static void
 276 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
 277                 bool push, bool load, bool pre_index)
 278 {
 279         uint32_t insn;
 280
 281         insn = (!!load) << 22;
 282         insn |= (!!pre_index) << 24;
 283         insn |= 0xa8800000;
 284         insn |= rt;
 285         insn |= rn << 5;
 286         insn |= rt2 << 10;
 287         if (push)
 288                 insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
 289         else
 290                 insn |= 0x2 << 15;
 291
 292         emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
 293
 294 }
 295
 296 /* Emit stp rt, rt2, [sp, #-16]! */
 297 static void
 298 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 299 {
 300         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
 301 }
 302
 303 /* Emit ldp rt, rt2, [sp, #16] */
 304 static void
 305 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 306 {
 307         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
 308 }
 309
 310 #define A64_MOVN 0
 311 #define A64_MOVZ 2
 312 #define A64_MOVK 3
 313 static void
 314 mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
 315         uint16_t imm16, uint8_t shift)
 316 {
 317         uint32_t insn;
 318
 319         insn = (!!is64) << 31;
 320         insn |= type << 29;
 321         insn |= 0x25 << 23;
 322         insn |= (shift/16) << 21;
 323         insn |= imm16 << 5;
 324         insn |= rd;
 325
 326         emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
 327 }
 328
 329 static void
 330 emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
 331 {
 332         uint16_t upper = val >> 16;
 333         uint16_t lower = val & 0xffff;
 334
 335         /* Positive number */
 336         if ((val & 1UL << 31) == 0) {
 337                 mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
 338                 if (upper)
 339                         mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
 340         } else { /* Negative number */
 341                 if (upper == 0xffff) {
 342                         mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
 343                 } else {
 344                         mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
 345                         if (lower != 0xffff)
 346                                 mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
 347                 }
 348         }
 349 }
 350
 351 static int
 352 u16_blocks_weight(const uint64_t val, bool one)
 353 {
 354         return (((val >>  0) & 0xffff) == (one ? 0xffff : 0x0000)) +
 355                (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
 356                (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
 357                (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
 358 }
 359
 360 static void
 361 emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
 362 {
 363         uint64_t nval = ~val;
 364         int movn, sr;
 365
 366         if (is64 == 0)
 367                 return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
 368
 369         /* Find MOVN or MOVZ first */
 370         movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
 371         /* Find shift right value */
 372         sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
 373         sr = RTE_ALIGN_FLOOR(sr, 16);
 374         sr = RTE_MAX(sr, 0);
 375
 376         if (movn)
 377                 mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
 378         else
 379                 mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
 380
 381         sr -= 16;
 382         while (sr >= 0) {
 383                 if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
 384                         mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
 385                 sr -= 16;
 386         }
 387 }
 388
 389 static void
 390 emit_ls(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, uint8_t rm,
 391         bool load)
 392 {
 393         uint32_t insn;
 394
 395         insn = 0x1c1 << 21;
 396         if (load)
 397                 insn |= 1 << 22;
 398         if (sz == BPF_B)
 399                 insn |= 0 << 30;
 400         else if (sz == BPF_H)
 401                 insn |= 1 << 30;
 402         else if (sz == BPF_W)
 403                 insn |= 2 << 30;
 404         else if (sz == EBPF_DW)
 405                 insn |= 3 << 30;
 406
 407         insn |= rm << 16;
 408         insn |= 0x1a << 10; /* LSL and S = 0 */
 409         insn |= rn << 5;
 410         insn |= rt;
 411
 412         emit_insn(ctx, insn, check_reg(rt) || check_reg(rn) || check_reg(rm) ||
 413                   check_ls_sz(sz));
 414 }
 415
 416 static void
 417 emit_str(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
 418          uint8_t rm)
 419 {
 420         emit_ls(ctx, sz, rt, rn, rm, 0);
 421 }
 422
 423 static void
 424 emit_ldr(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
 425          uint8_t rm)
 426 {
 427         emit_ls(ctx, sz, rt, rn, rm, 1);
 428 }
 429
 430 #define A64_ADD 0x58
 431 #define A64_SUB 0x258
 432 static void
 433 emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 434              uint8_t rm, uint16_t op)
 435 {
 436         uint32_t insn;
 437
 438         insn = (!!is64) << 31;
 439         insn |= op << 21; /* shift == 0 */
 440         insn |= rm << 16;
 441         insn |= rn << 5;
 442         insn |= rd;
 443
 444         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 445 }
 446
 447 static void
 448 emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 449 {
 450         emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
 451 }
 452
 453 static void
 454 emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 455 {
 456         emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
 457 }
 458
 459 static void
 460 emit_neg(struct a64_jit_ctx *ctx, bool is64, uint8_t rd)
 461 {
 462         emit_add_sub(ctx, is64, rd, A64_ZR, rd, A64_SUB);
 463 }
 464
 465 static void
 466 emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 467 {
 468         uint32_t insn;
 469
 470         insn = (!!is64) << 31;
 471         insn |= 0xd8 << 21;
 472         insn |= rm << 16;
 473         insn |= A64_ZR << 10;
 474         insn |= rd << 5;
 475         insn |= rd;
 476
 477         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 478 }
 479
 480 #define A64_UDIV 0x2
 481 #define A64_LSLV 0x8
 482 #define A64_LSRV 0x9
 483 #define A64_ASRV 0xA
 484 static void
 485 emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 486                           uint8_t rn, uint8_t rm, uint16_t op)
 487
 488 {
 489         uint32_t insn;
 490
 491         insn = (!!is64) << 31;
 492         insn |= 0xd6 << 21;
 493         insn |= rm << 16;
 494         insn |= op << 10;
 495         insn |= rn << 5;
 496         insn |= rd;
 497
 498         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 499 }
 500
 501 static void
 502 emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 503 {
 504         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
 505 }
 506
 507 static void
 508 emit_lslv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 509 {
 510         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSLV);
 511 }
 512
 513 static void
 514 emit_lsrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 515 {
 516         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSRV);
 517 }
 518
 519 static void
 520 emit_asrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 521 {
 522         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_ASRV);
 523 }
 524
 525 #define A64_UBFM 0x2
 526 #define A64_SBFM 0x0
 527 static void
 528 emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 529               uint8_t immr, uint8_t imms, uint16_t op)
 530
 531 {
 532         uint32_t insn;
 533
 534         insn = (!!is64) << 31;
 535         if (insn)
 536                 insn |= 1 << 22; /* Set N bit when is64 is set */
 537         insn |= op << 29;
 538         insn |= 0x26 << 23;
 539         insn |= immr << 16;
 540         insn |= imms << 10;
 541         insn |= rn << 5;
 542         insn |= rd;
 543
 544         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) ||
 545                   check_immr_imms(is64, immr, imms));
 546 }
 547 static void
 548 emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 549 {
 550         const unsigned int width = is64 ? 64 : 32;
 551         uint8_t imms, immr;
 552
 553         immr = (width - imm) & (width - 1);
 554         imms = width - 1 - imm;
 555
 556         emit_bitfield(ctx, is64, rd, rd, immr, imms, A64_UBFM);
 557 }
 558
 559 static void
 560 emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 561 {
 562         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM);
 563 }
 564
 565 static void
 566 emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 567 {
 568         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM);
 569 }
 570
 571 #define A64_AND 0
 572 #define A64_OR 1
 573 #define A64_XOR 2
 574 static void
 575 emit_logical(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 576              uint8_t rm, uint16_t op)
 577 {
 578         uint32_t insn;
 579
 580         insn = (!!is64) << 31;
 581         insn |= op << 29;
 582         insn |= 0x50 << 21;
 583         insn |= rm << 16;
 584         insn |= rd << 5;
 585         insn |= rd;
 586
 587         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 588 }
 589
 590 static void
 591 emit_or(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 592 {
 593         emit_logical(ctx, is64, rd, rm, A64_OR);
 594 }
 595
 596 static void
 597 emit_and(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 598 {
 599         emit_logical(ctx, is64, rd, rm, A64_AND);
 600 }
 601
 602 static void
 603 emit_xor(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 604 {
 605         emit_logical(ctx, is64, rd, rm, A64_XOR);
 606 }
 607
 608 static void
 609 emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 610           uint8_t rm, uint8_t ra)
 611 {
 612         uint32_t insn;
 613
 614         insn = (!!is64) << 31;
 615         insn |= 0xd8 << 21;
 616         insn |= rm << 16;
 617         insn |= 0x1 << 15;
 618         insn |= ra << 10;
 619         insn |= rn << 5;
 620         insn |= rd;
 621
 622         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
 623                   check_reg(ra));
 624 }
 625
 626 static void
 627 emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
 628          uint8_t rm)
 629 {
 630         emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
 631         emit_msub(ctx, is64, rd, tmp, rm, rd);
 632 }
 633
 634 static void
 635 emit_blr(struct a64_jit_ctx *ctx, uint8_t rn)
 636 {
 637         uint32_t insn;
 638
 639         insn = 0xd63f0000;
 640         insn |= rn << 5;
 641
 642         emit_insn(ctx, insn, check_reg(rn));
 643 }
 644
 645 static void
 646 emit_zero_extend(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 647 {
 648         switch (imm) {
 649         case 16:
 650                 /* Zero-extend 16 bits into 64 bits */
 651                 emit_bitfield(ctx, 1, rd, rd, 0, 15, A64_UBFM);
 652                 break;
 653         case 32:
 654                 /* Zero-extend 32 bits into 64 bits */
 655                 emit_bitfield(ctx, 1, rd, rd, 0, 31, A64_UBFM);
 656                 break;
 657         case 64:
 658                 break;
 659         default:
 660                 /* Generate error */
 661                 emit_insn(ctx, 0, 1);
 662         }
 663 }
 664
 665 static void
 666 emit_rev(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 667 {
 668         uint32_t insn;
 669
 670         insn = 0xdac00000;
 671         insn |= rd << 5;
 672         insn |= rd;
 673
 674         switch (imm) {
 675         case 16:
 676                 insn |= 1 << 10;
 677                 emit_insn(ctx, insn, check_reg(rd));
 678                 emit_zero_extend(ctx, rd, 16);
 679                 break;
 680         case 32:
 681                 insn |= 2 << 10;
 682                 emit_insn(ctx, insn, check_reg(rd));
 683                 /* Upper 32 bits already cleared */
 684                 break;
 685         case 64:
 686                 insn |= 3 << 10;
 687                 emit_insn(ctx, insn, check_reg(rd));
 688                 break;
 689         default:
 690                 /* Generate error */
 691                 emit_insn(ctx, insn, 1);
 692         }
 693 }
 694
 695 static int
 696 is_be(void)
 697 {
 698 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
 699         return 1;
 700 #else
 701         return 0;
 702 #endif
 703 }
 704
 705 static void
 706 emit_be(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 707 {
 708         if (is_be())
 709                 emit_zero_extend(ctx, rd, imm);
 710         else
 711                 emit_rev(ctx, rd, imm);
 712 }
 713
 714 static void
 715 emit_le(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 716 {
 717         if (is_be())
 718                 emit_rev(ctx, rd, imm);
 719         else
 720                 emit_zero_extend(ctx, rd, imm);
 721 }
 722
 723 static uint8_t
 724 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
 725 {
 726         const uint32_t ebpf2a64_has_call[] = {
 727                 /* Map A64 R7 register as EBPF return register */
 728                 [EBPF_REG_0] = A64_R(7),
 729                 /* Map A64 arguments register as EBPF arguments register */
 730                 [EBPF_REG_1] = A64_R(0),
 731                 [EBPF_REG_2] = A64_R(1),
 732                 [EBPF_REG_3] = A64_R(2),
 733                 [EBPF_REG_4] = A64_R(3),
 734                 [EBPF_REG_5] = A64_R(4),
 735                 /* Map A64 callee save register as EBPF callee save register */
 736                 [EBPF_REG_6] = A64_R(19),
 737                 [EBPF_REG_7] = A64_R(20),
 738                 [EBPF_REG_8] = A64_R(21),
 739                 [EBPF_REG_9] = A64_R(22),
 740                 [EBPF_FP]    = A64_R(25),
 741                 /* Map A64 scratch registers as temporary storage */
 742                 [TMP_REG_1] = A64_R(9),
 743                 [TMP_REG_2] = A64_R(10),
 744                 [TMP_REG_3] = A64_R(11),
 745         };
 746
 747         const uint32_t ebpf2a64_no_call[] = {
 748                 /* Map A64 R7 register as EBPF return register */
 749                 [EBPF_REG_0] = A64_R(7),
 750                 /* Map A64 arguments register as EBPF arguments register */
 751                 [EBPF_REG_1] = A64_R(0),
 752                 [EBPF_REG_2] = A64_R(1),
 753                 [EBPF_REG_3] = A64_R(2),
 754                 [EBPF_REG_4] = A64_R(3),
 755                 [EBPF_REG_5] = A64_R(4),
 756                 /*
 757                  * EBPF program does not have EBPF_CALL op code,
 758                  * Map A64 scratch registers as EBPF callee save registers.
 759                  */
 760                 [EBPF_REG_6] = A64_R(9),
 761                 [EBPF_REG_7] = A64_R(10),
 762                 [EBPF_REG_8] = A64_R(11),
 763                 [EBPF_REG_9] = A64_R(12),
 764                 /* Map A64 FP register as EBPF FP register */
 765                 [EBPF_FP]    = A64_FP,
 766                 /* Map remaining A64 scratch registers as temporary storage */
 767                 [TMP_REG_1] = A64_R(13),
 768                 [TMP_REG_2] = A64_R(14),
 769                 [TMP_REG_3] = A64_R(15),
 770         };
 771
 772         if (ctx->foundcall)
 773                 return ebpf2a64_has_call[reg];
 774         else
 775                 return ebpf2a64_no_call[reg];
 776 }
 777
 778 /*
 779  * Procedure call standard for the arm64
 780  * -------------------------------------
 781  * R0..R7  - Parameter/result registers
 782  * R8      - Indirect result location register
 783  * R9..R15 - Scratch registers
 784  * R15     - Platform Register
 785  * R16     - First intra-procedure-call scratch register
 786  * R17     - Second intra-procedure-call temporary register
 787  * R19-R28 - Callee saved registers
 788  * R29     - Frame pointer
 789  * R30     - Link register
 790  * R31     - Stack pointer
 791  */
 792 static void
 793 emit_prologue_has_call(struct a64_jit_ctx *ctx)
 794 {
 795         uint8_t r6, r7, r8, r9, fp;
 796
 797         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 798         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 799         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 800         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 801         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 802
 803         /*
 804          * eBPF prog stack layout
 805          *
 806          *                               high
 807          *       eBPF prologue       0:+-----+ <= original A64_SP
 808          *                             |FP/LR|
 809          *                         -16:+-----+ <= current A64_FP
 810          *    Callee saved registers   | ... |
 811          *             EBPF_FP =>  -64:+-----+
 812          *                             |     |
 813          *       eBPF prog stack       | ... |
 814          *                             |     |
 815          * (EBPF_FP - bpf->stack_sz)=> +-----+
 816          * Pad for A64_SP 16B alignment| PAD |
 817          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 818          *                             |     |
 819          *                             | ... | Function call stack
 820          *                             |     |
 821          *                             +-----+
 822          *                              low
 823          */
 824         emit_stack_push(ctx, A64_FP, A64_LR);
 825         emit_mov_64(ctx, A64_FP, A64_SP);
 826         emit_stack_push(ctx, r6, r7);
 827         emit_stack_push(ctx, r8, r9);
 828         /*
 829          * There is no requirement to save A64_R(28) in stack. Doing it here,
 830          * because, A64_SP needs be to 16B aligned and STR vs STP
 831          * takes same number of cycles(typically).
 832          */
 833         emit_stack_push(ctx, fp, A64_R(28));
 834         emit_mov_64(ctx, fp, A64_SP);
 835         if (ctx->stack_sz)
 836                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 837 }
 838
 839 static void
 840 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
 841 {
 842         uint8_t r6, r7, r8, r9, fp, r0;
 843
 844         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 845         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 846         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 847         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 848         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 849         r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 850
 851         if (ctx->stack_sz)
 852                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 853         emit_stack_pop(ctx, fp, A64_R(28));
 854         emit_stack_pop(ctx, r8, r9);
 855         emit_stack_pop(ctx, r6, r7);
 856         emit_stack_pop(ctx, A64_FP, A64_LR);
 857         emit_mov_64(ctx, A64_R(0), r0);
 858         emit_ret(ctx);
 859 }
 860
 861 static void
 862 emit_prologue_no_call(struct a64_jit_ctx *ctx)
 863 {
 864         /*
 865          * eBPF prog stack layout without EBPF_CALL opcode
 866          *
 867          *                               high
 868          *    eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
 869          *                             |     |
 870          *                             | ... |
 871          *            eBPF prog stack  |     |
 872          *                             |     |
 873          * (EBPF_FP - bpf->stack_sz)=> +-----+
 874          * Pad for A64_SP 16B alignment| PAD |
 875          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 876          *                             |     |
 877          *                             | ... | Function call stack
 878          *                             |     |
 879          *                             +-----+
 880          *                              low
 881          */
 882         if (ctx->stack_sz) {
 883                 emit_mov_64(ctx, A64_FP, A64_SP);
 884                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 885         }
 886 }
 887
 888 static void
 889 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
 890 {
 891         if (ctx->stack_sz)
 892                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 893         emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
 894         emit_ret(ctx);
 895 }
 896
 897 static void
 898 emit_prologue(struct a64_jit_ctx *ctx)
 899 {
 900         if (ctx->foundcall)
 901                 emit_prologue_has_call(ctx);
 902         else
 903                 emit_prologue_no_call(ctx);
 904
 905         ctx->program_start = ctx->idx;
 906 }
 907
 908 static void
 909 emit_epilogue(struct a64_jit_ctx *ctx)
 910 {
 911         ctx->program_sz = ctx->idx - ctx->program_start;
 912
 913         if (ctx->foundcall)
 914                 emit_epilogue_has_call(ctx);
 915         else
 916                 emit_epilogue_no_call(ctx);
 917 }
 918
 919 static void
 920 emit_call(struct a64_jit_ctx *ctx, uint8_t tmp, void *func)
 921 {
 922         uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 923
 924         emit_mov_imm(ctx, 1, tmp, (uint64_t)func);
 925         emit_blr(ctx, tmp);
 926         emit_mov_64(ctx, r0, A64_R(0));
 927 }
 928
 929 static void
 930 emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19)
 931 {
 932         uint32_t insn, imm;
 933
 934         imm = mask_imm(19, imm19);
 935         insn = (!!is64) << 31;
 936         insn |= 0x35 << 24;
 937         insn |= imm << 5;
 938         insn |= rt;
 939
 940         emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19));
 941 }
 942
 943 static void
 944 emit_b(struct a64_jit_ctx *ctx, int32_t imm26)
 945 {
 946         uint32_t insn, imm;
 947
 948         imm = mask_imm(26, imm26);
 949         insn = 0x5 << 26;
 950         insn |= imm;
 951
 952         emit_insn(ctx, insn, check_imm(26, imm26));
 953 }
 954
 955 static void
 956 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
 957 {
 958         uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 959         uint16_t jump_to_epilogue;
 960
 961         emit_cbnz(ctx, is64, src, 3);
 962         emit_mov_imm(ctx, is64, r0, 0);
 963         jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
 964         emit_b(ctx, jump_to_epilogue);
 965 }
 966
 967 static void
 968 emit_stadd(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rn)
 969 {
 970         uint32_t insn;
 971
 972         insn = 0xb820001f;
 973         insn |= (!!is64) << 30;
 974         insn |= rs << 16;
 975         insn |= rn << 5;
 976
 977         emit_insn(ctx, insn, check_reg(rs) || check_reg(rn));
 978 }
 979
 980 static void
 981 emit_ldxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, uint8_t rn)
 982 {
 983         uint32_t insn;
 984
 985         insn = 0x885f7c00;
 986         insn |= (!!is64) << 30;
 987         insn |= rn << 5;
 988         insn |= rt;
 989
 990         emit_insn(ctx, insn, check_reg(rt) || check_reg(rn));
 991 }
 992
 993 static void
 994 emit_stxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rt,
 995           uint8_t rn)
 996 {
 997         uint32_t insn;
 998
 999         insn = 0x88007c00;
1000         insn |= (!!is64) << 30;
1001         insn |= rs << 16;
1002         insn |= rn << 5;
1003         insn |= rt;
1004
1005         emit_insn(ctx, insn, check_reg(rs) || check_reg(rt) || check_reg(rn));
1006 }
1007
1008 static int
1009 has_atomics(void)
1010 {
1011         int rc = 0;
1012
1013 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
1014         rc = 1;
1015 #endif
1016         return rc;
1017 }
1018
1019 static void
1020 emit_xadd(struct a64_jit_ctx *ctx, uint8_t op, uint8_t tmp1, uint8_t tmp2,
1021           uint8_t tmp3, uint8_t dst, int16_t off, uint8_t src)
1022 {
1023         bool is64 = (BPF_SIZE(op) == EBPF_DW);
1024         uint8_t rn;
1025
1026         if (off) {
1027                 emit_mov_imm(ctx, 1, tmp1, off);
1028                 emit_add(ctx, 1, tmp1, dst);
1029                 rn = tmp1;
1030         } else {
1031                 rn = dst;
1032         }
1033
1034         if (has_atomics()) {
1035                 emit_stadd(ctx, is64, src, rn);
1036         } else {
1037                 emit_ldxr(ctx, is64, tmp2, rn);
1038                 emit_add(ctx, is64, tmp2, src);
1039                 emit_stxr(ctx, is64, tmp3, tmp2, rn);
1040                 emit_cbnz(ctx, is64, tmp3, -3);
1041         }
1042 }
1043
1044 #define A64_CMP 0x6b00000f
1045 #define A64_TST 0x6a00000f
1046 static void
1047 emit_cmp_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm,
1048              uint32_t opc)
1049 {
1050         uint32_t insn;
1051
1052         insn = opc;
1053         insn |= (!!is64) << 31;
1054         insn |= rm << 16;
1055         insn |= rn << 5;
1056
1057         emit_insn(ctx, insn, check_reg(rn) || check_reg(rm));
1058 }
1059
1060 static void
1061 emit_cmp(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm)
1062 {
1063         emit_cmp_tst(ctx, is64, rn, rm, A64_CMP);
1064 }
1065
1066 static void
1067 emit_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm)
1068 {
1069         emit_cmp_tst(ctx, is64, rn, rm, A64_TST);
1070 }
1071
1072 static void
1073 emit_b_cond(struct a64_jit_ctx *ctx, uint8_t cond, int32_t imm19)
1074 {
1075         uint32_t insn, imm;
1076
1077         imm = mask_imm(19, imm19);
1078         insn = 0x15 << 26;
1079         insn |= imm << 5;
1080         insn |= cond;
1081
1082         emit_insn(ctx, insn, check_cond(cond) || check_imm(19, imm19));
1083 }
1084
1085 static void
1086 emit_branch(struct a64_jit_ctx *ctx, uint8_t op, uint32_t i, int16_t off)
1087 {
1088         jump_offset_to_branch_update(ctx, i);
1089         emit_b_cond(ctx, ebpf_to_a64_cond(op), jump_offset_get(ctx, i, off));
1090 }
1091
1092 static void
1093 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
1094 {
1095         const struct ebpf_insn *ins;
1096         uint8_t op;
1097         uint32_t i;
1098
1099         for (i = 0; i != bpf->prm.nb_ins; i++) {
1100                 ins = bpf->prm.ins + i;
1101                 op = ins->code;
1102
1103                 switch (op) {
1104                 /* Call imm */
1105                 case (BPF_JMP | EBPF_CALL):
1106                         ctx->foundcall = 1;
1107                         return;
1108                 }
1109         }
1110 }
1111
1112 /*
1113  * Walk through eBPF code and translate them to arm64 one.
1114  */
1115 static int
1116 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
1117 {
1118         uint8_t op, dst, src, tmp1, tmp2, tmp3;
1119         const struct ebpf_insn *ins;
1120         uint64_t u64;
1121         int16_t off;
1122         int32_t imm;
1123         uint32_t i;
1124         bool is64;
1125         int rc;
1126
1127         /* Reset context fields */
1128         ctx->idx = 0;
1129         /* arm64 SP must be aligned to 16 */
1130         ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
1131         tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
1132         tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
1133         tmp3 = ebpf_to_a64_reg(ctx, TMP_REG_3);
1134
1135         emit_prologue(ctx);
1136
1137         for (i = 0; i != bpf->prm.nb_ins; i++) {
1138
1139                 jump_offset_update(ctx, i);
1140                 ins = bpf->prm.ins + i;
1141                 op = ins->code;
1142                 off = ins->off;
1143                 imm = ins->imm;
1144
1145                 dst = ebpf_to_a64_reg(ctx, ins->dst_reg);
1146                 src = ebpf_to_a64_reg(ctx, ins->src_reg);
1147                 is64 = (BPF_CLASS(op) == EBPF_ALU64);
1148
1149                 switch (op) {
1150                 /* dst = src */
1151                 case (BPF_ALU | EBPF_MOV | BPF_X):
1152                 case (EBPF_ALU64 | EBPF_MOV | BPF_X):
1153                         emit_mov(ctx, is64, dst, src);
1154                         break;
1155                 /* dst = imm */
1156                 case (BPF_ALU | EBPF_MOV | BPF_K):
1157                 case (EBPF_ALU64 | EBPF_MOV | BPF_K):
1158                         emit_mov_imm(ctx, is64, dst, imm);
1159                         break;
1160                 /* dst += src */
1161                 case (BPF_ALU | BPF_ADD | BPF_X):
1162                 case (EBPF_ALU64 | BPF_ADD | BPF_X):
1163                         emit_add(ctx, is64, dst, src);
1164                         break;
1165                 /* dst += imm */
1166                 case (BPF_ALU | BPF_ADD | BPF_K):
1167                 case (EBPF_ALU64 | BPF_ADD | BPF_K):
1168                         emit_mov_imm(ctx, is64, tmp1, imm);
1169                         emit_add(ctx, is64, dst, tmp1);
1170                         break;
1171                 /* dst -= src */
1172                 case (BPF_ALU | BPF_SUB | BPF_X):
1173                 case (EBPF_ALU64 | BPF_SUB | BPF_X):
1174                         emit_sub(ctx, is64, dst, src);
1175                         break;
1176                 /* dst -= imm */
1177                 case (BPF_ALU | BPF_SUB | BPF_K):
1178                 case (EBPF_ALU64 | BPF_SUB | BPF_K):
1179                         emit_mov_imm(ctx, is64, tmp1, imm);
1180                         emit_sub(ctx, is64, dst, tmp1);
1181                         break;
1182                 /* dst *= src */
1183                 case (BPF_ALU | BPF_MUL | BPF_X):
1184                 case (EBPF_ALU64 | BPF_MUL | BPF_X):
1185                         emit_mul(ctx, is64, dst, src);
1186                         break;
1187                 /* dst *= imm */
1188                 case (BPF_ALU | BPF_MUL | BPF_K):
1189                 case (EBPF_ALU64 | BPF_MUL | BPF_K):
1190                         emit_mov_imm(ctx, is64, tmp1, imm);
1191                         emit_mul(ctx, is64, dst, tmp1);
1192                         break;
1193                 /* dst /= src */
1194                 case (BPF_ALU | BPF_DIV | BPF_X):
1195                 case (EBPF_ALU64 | BPF_DIV | BPF_X):
1196                         emit_return_zero_if_src_zero(ctx, is64, src);
1197                         emit_div(ctx, is64, dst, src);
1198                         break;
1199                 /* dst /= imm */
1200                 case (BPF_ALU | BPF_DIV | BPF_K):
1201                 case (EBPF_ALU64 | BPF_DIV | BPF_K):
1202                         emit_mov_imm(ctx, is64, tmp1, imm);
1203                         emit_div(ctx, is64, dst, tmp1);
1204                         break;
1205                 /* dst %= src */
1206                 case (BPF_ALU | BPF_MOD | BPF_X):
1207                 case (EBPF_ALU64 | BPF_MOD | BPF_X):
1208                         emit_return_zero_if_src_zero(ctx, is64, src);
1209                         emit_mod(ctx, is64, tmp1, dst, src);
1210                         break;
1211                 /* dst %= imm */
1212                 case (BPF_ALU | BPF_MOD | BPF_K):
1213                 case (EBPF_ALU64 | BPF_MOD | BPF_K):
1214                         emit_mov_imm(ctx, is64, tmp1, imm);
1215                         emit_mod(ctx, is64, tmp2, dst, tmp1);
1216                         break;
1217                 /* dst |= src */
1218                 case (BPF_ALU | BPF_OR | BPF_X):
1219                 case (EBPF_ALU64 | BPF_OR | BPF_X):
1220                         emit_or(ctx, is64, dst, src);
1221                         break;
1222                 /* dst |= imm */
1223                 case (BPF_ALU | BPF_OR | BPF_K):
1224                 case (EBPF_ALU64 | BPF_OR | BPF_K):
1225                         emit_mov_imm(ctx, is64, tmp1, imm);
1226                         emit_or(ctx, is64, dst, tmp1);
1227                         break;
1228                 /* dst &= src */
1229                 case (BPF_ALU | BPF_AND | BPF_X):
1230                 case (EBPF_ALU64 | BPF_AND | BPF_X):
1231                         emit_and(ctx, is64, dst, src);
1232                         break;
1233                 /* dst &= imm */
1234                 case (BPF_ALU | BPF_AND | BPF_K):
1235                 case (EBPF_ALU64 | BPF_AND | BPF_K):
1236                         emit_mov_imm(ctx, is64, tmp1, imm);
1237                         emit_and(ctx, is64, dst, tmp1);
1238                         break;
1239                 /* dst ^= src */
1240                 case (BPF_ALU | BPF_XOR | BPF_X):
1241                 case (EBPF_ALU64 | BPF_XOR | BPF_X):
1242                         emit_xor(ctx, is64, dst, src);
1243                         break;
1244                 /* dst ^= imm */
1245                 case (BPF_ALU | BPF_XOR | BPF_K):
1246                 case (EBPF_ALU64 | BPF_XOR | BPF_K):
1247                         emit_mov_imm(ctx, is64, tmp1, imm);
1248                         emit_xor(ctx, is64, dst, tmp1);
1249                         break;
1250                 /* dst = -dst */
1251                 case (BPF_ALU | BPF_NEG):
1252                 case (EBPF_ALU64 | BPF_NEG):
1253                         emit_neg(ctx, is64, dst);
1254                         break;
1255                 /* dst <<= src */
1256                 case BPF_ALU | BPF_LSH | BPF_X:
1257                 case EBPF_ALU64 | BPF_LSH | BPF_X:
1258                         emit_lslv(ctx, is64, dst, src);
1259                         break;
1260                 /* dst <<= imm */
1261                 case BPF_ALU | BPF_LSH | BPF_K:
1262                 case EBPF_ALU64 | BPF_LSH | BPF_K:
1263                         emit_lsl(ctx, is64, dst, imm);
1264                         break;
1265                 /* dst >>= src */
1266                 case BPF_ALU | BPF_RSH | BPF_X:
1267                 case EBPF_ALU64 | BPF_RSH | BPF_X:
1268                         emit_lsrv(ctx, is64, dst, src);
1269                         break;
1270                 /* dst >>= imm */
1271                 case BPF_ALU | BPF_RSH | BPF_K:
1272                 case EBPF_ALU64 | BPF_RSH | BPF_K:
1273                         emit_lsr(ctx, is64, dst, imm);
1274                         break;
1275                 /* dst >>= src (arithmetic) */
1276                 case BPF_ALU | EBPF_ARSH | BPF_X:
1277                 case EBPF_ALU64 | EBPF_ARSH | BPF_X:
1278                         emit_asrv(ctx, is64, dst, src);
1279                         break;
1280                 /* dst >>= imm (arithmetic) */
1281                 case BPF_ALU | EBPF_ARSH | BPF_K:
1282                 case EBPF_ALU64 | EBPF_ARSH | BPF_K:
1283                         emit_asr(ctx, is64, dst, imm);
1284                         break;
1285                 /* dst = be##imm(dst) */
1286                 case (BPF_ALU | EBPF_END | EBPF_TO_BE):
1287                         emit_be(ctx, dst, imm);
1288                         break;
1289                 /* dst = le##imm(dst) */
1290                 case (BPF_ALU | EBPF_END | EBPF_TO_LE):
1291                         emit_le(ctx, dst, imm);
1292                         break;
1293                 /* dst = *(size *) (src + off) */
1294                 case (BPF_LDX | BPF_MEM | BPF_B):
1295                 case (BPF_LDX | BPF_MEM | BPF_H):
1296                 case (BPF_LDX | BPF_MEM | BPF_W):
1297                 case (BPF_LDX | BPF_MEM | EBPF_DW):
1298                         emit_mov_imm(ctx, 1, tmp1, off);
1299                         emit_ldr(ctx, BPF_SIZE(op), dst, src, tmp1);
1300                         break;
1301                 /* dst = imm64 */
1302                 case (BPF_LD | BPF_IMM | EBPF_DW):
1303                         u64 = ((uint64_t)ins[1].imm << 32) | (uint32_t)imm;
1304                         emit_mov_imm(ctx, 1, dst, u64);
1305                         i++;
1306                         break;
1307                 /* *(size *)(dst + off) = src */
1308                 case (BPF_STX | BPF_MEM | BPF_B):
1309                 case (BPF_STX | BPF_MEM | BPF_H):
1310                 case (BPF_STX | BPF_MEM | BPF_W):
1311                 case (BPF_STX | BPF_MEM | EBPF_DW):
1312                         emit_mov_imm(ctx, 1, tmp1, off);
1313                         emit_str(ctx, BPF_SIZE(op), src, dst, tmp1);
1314                         break;
1315                 /* *(size *)(dst + off) = imm */
1316                 case (BPF_ST | BPF_MEM | BPF_B):
1317                 case (BPF_ST | BPF_MEM | BPF_H):
1318                 case (BPF_ST | BPF_MEM | BPF_W):
1319                 case (BPF_ST | BPF_MEM | EBPF_DW):
1320                         emit_mov_imm(ctx, 1, tmp1, imm);
1321                         emit_mov_imm(ctx, 1, tmp2, off);
1322                         emit_str(ctx, BPF_SIZE(op), tmp1, dst, tmp2);
1323                         break;
1324                 /* STX XADD: lock *(size *)(dst + off) += src */
1325                 case (BPF_STX | EBPF_XADD | BPF_W):
1326                 case (BPF_STX | EBPF_XADD | EBPF_DW):
1327                         emit_xadd(ctx, op, tmp1, tmp2, tmp3, dst, off, src);
1328                         break;
1329                 /* PC += off */
1330                 case (BPF_JMP | BPF_JA):
1331                         emit_b(ctx, jump_offset_get(ctx, i, off));
1332                         break;
1333                 /* PC += off if dst COND imm */
1334                 case (BPF_JMP | BPF_JEQ | BPF_K):
1335                 case (BPF_JMP | EBPF_JNE | BPF_K):
1336                 case (BPF_JMP | BPF_JGT | BPF_K):
1337                 case (BPF_JMP | EBPF_JLT | BPF_K):
1338                 case (BPF_JMP | BPF_JGE | BPF_K):
1339                 case (BPF_JMP | EBPF_JLE | BPF_K):
1340                 case (BPF_JMP | EBPF_JSGT | BPF_K):
1341                 case (BPF_JMP | EBPF_JSLT | BPF_K):
1342                 case (BPF_JMP | EBPF_JSGE | BPF_K):
1343                 case (BPF_JMP | EBPF_JSLE | BPF_K):
1344                         emit_mov_imm(ctx, 1, tmp1, imm);
1345                         emit_cmp(ctx, 1, dst, tmp1);
1346                         emit_branch(ctx, op, i, off);
1347                         break;
1348                 case (BPF_JMP | BPF_JSET | BPF_K):
1349                         emit_mov_imm(ctx, 1, tmp1, imm);
1350                         emit_tst(ctx, 1, dst, tmp1);
1351                         emit_branch(ctx, op, i, off);
1352                         break;
1353                 /* PC += off if dst COND src */
1354                 case (BPF_JMP | BPF_JEQ | BPF_X):
1355                 case (BPF_JMP | EBPF_JNE | BPF_X):
1356                 case (BPF_JMP | BPF_JGT | BPF_X):
1357                 case (BPF_JMP | EBPF_JLT | BPF_X):
1358                 case (BPF_JMP | BPF_JGE | BPF_X):
1359                 case (BPF_JMP | EBPF_JLE | BPF_X):
1360                 case (BPF_JMP | EBPF_JSGT | BPF_X):
1361                 case (BPF_JMP | EBPF_JSLT | BPF_X):
1362                 case (BPF_JMP | EBPF_JSGE | BPF_X):
1363                 case (BPF_JMP | EBPF_JSLE | BPF_X):
1364                         emit_cmp(ctx, 1, dst, src);
1365                         emit_branch(ctx, op, i, off);
1366                         break;
1367                 case (BPF_JMP | BPF_JSET | BPF_X):
1368                         emit_tst(ctx, 1, dst, src);
1369                         emit_branch(ctx, op, i, off);
1370                         break;
1371                 /* Call imm */
1372                 case (BPF_JMP | EBPF_CALL):
1373                         emit_call(ctx, tmp1, bpf->prm.xsym[ins->imm].func.val);
1374                         break;
1375                 /* Return r0 */
1376                 case (BPF_JMP | EBPF_EXIT):
1377                         emit_epilogue(ctx);
1378                         break;
1379                 default:
1380                         RTE_BPF_LOG(ERR,
1381                                 "%s(%p): invalid opcode %#x at pc: %u;\n",
1382                                 __func__, bpf, ins->code, i);
1383                         return -EINVAL;
1384                 }
1385         }
1386         rc = check_invalid_args(ctx, ctx->idx);
1387
1388         return rc;
1389 }
1390
1391 /*
1392  * Produce a native ISA version of the given BPF code.
1393  */
1394 int
1395 bpf_jit_arm64(struct rte_bpf *bpf)
1396 {
1397         struct a64_jit_ctx ctx;
1398         size_t size;
1399         int rc;
1400
1401         /* Init JIT context */
1402         memset(&ctx, 0, sizeof(ctx));
1403
1404         /* Initialize the memory for eBPF to a64 insn offset map for jump */
1405         rc = jump_offset_init(&ctx, bpf);
1406         if (rc)
1407                 goto error;
1408
1409         /* Find eBPF program has call class or not */
1410         check_program_has_call(&ctx, bpf);
1411
1412         /* First pass to calculate total code size and valid jump offsets */
1413         rc = emit(&ctx, bpf);
1414         if (rc)
1415                 goto finish;
1416
1417         size = ctx.idx * sizeof(uint32_t);
1418         /* Allocate JIT program memory */
1419         ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
1420                                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1421         if (ctx.ins == MAP_FAILED) {
1422                 rc = -ENOMEM;
1423                 goto finish;
1424         }
1425
1426         /* Second pass to generate code */
1427         rc = emit(&ctx, bpf);
1428         if (rc)
1429                 goto munmap;
1430
1431         rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
1432         if (rc) {
1433                 rc = -errno;
1434                 goto munmap;
1435         }
1436
1437         /* Flush the icache */
1438         __builtin___clear_cache((char *)ctx.ins, (char *)(ctx.ins + ctx.idx));
1439
1440         bpf->jit.func = (void *)ctx.ins;
1441         bpf->jit.sz = size;
1442
1443         goto finish;
1444
1445 munmap:
1446         munmap(ctx.ins, size);
1447 finish:
1448         jump_offset_fini(&ctx);
1449 error:
1450         return rc;
1451 }