lib/librte_bpf/bpf_jit_arm64.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(C) 2019 Marvell International Ltd.
   3  */
   4
   5 #include <errno.h>
   6 #include <stdbool.h>
   7
   8 #include <rte_common.h>
   9 #include <rte_byteorder.h>
  10
  11 #include "bpf_impl.h"
  12
  13 #define A64_REG_MASK(r)         ((r) & 0x1f)
  14 #define A64_INVALID_OP_CODE     (0xffffffff)
  15
  16 #define TMP_REG_1               (EBPF_REG_10 + 1)
  17 #define TMP_REG_2               (EBPF_REG_10 + 2)
  18 #define TMP_REG_3               (EBPF_REG_10 + 3)
  19
  20 #define EBPF_FP                 (EBPF_REG_10)
  21 #define EBPF_OP_GET(op)         (BPF_OP(op) >> 4)
  22
  23 #define A64_R(x)                x
  24 #define A64_FP                  29
  25 #define A64_LR                  30
  26 #define A64_SP                  31
  27 #define A64_ZR                  31
  28
  29 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
  30 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
  31
  32 struct ebpf_a64_map {
  33         uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
  34         uint8_t off_to_b; /* Offset to branch instruction delta */
  35 };
  36
  37 struct a64_jit_ctx {
  38         size_t stack_sz;          /* Stack size */
  39         uint32_t *ins;            /* ARM64 instructions. NULL if first pass */
  40         struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
  41         uint32_t idx;             /* Current instruction index */
  42         uint32_t program_start;   /* Program index, Just after prologue */
  43         uint32_t program_sz;      /* Program size. Found in first pass */
  44         uint8_t foundcall;        /* Found EBPF_CALL class code in eBPF pgm */
  45 };
  46
  47 static int
  48 check_immr_imms(bool is64, uint8_t immr, uint8_t imms)
  49 {
  50         const unsigned int width = is64 ? 64 : 32;
  51
  52         if (immr >= width || imms >= width)
  53                 return 1;
  54
  55         return 0;
  56 }
  57
  58 static int
  59 check_mov_hw(bool is64, const uint8_t val)
  60 {
  61         if (val == 16 || val == 0)
  62                 return 0;
  63         else if (is64 && val != 64 && val != 48 && val != 32)
  64                 return 1;
  65
  66         return 0;
  67 }
  68
  69 static int
  70 check_reg(uint8_t r)
  71 {
  72         return (r > 31) ? 1 : 0;
  73 }
  74
  75 static int
  76 is_first_pass(struct a64_jit_ctx *ctx)
  77 {
  78         return (ctx->ins == NULL);
  79 }
  80
  81 static int
  82 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
  83 {
  84         uint32_t idx;
  85
  86         if (is_first_pass(ctx))
  87                 return 0;
  88
  89         for (idx = 0; idx < limit; idx++) {
  90                 if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
  91                         RTE_BPF_LOG(ERR,
  92                                 "%s: invalid opcode at %u;\n", __func__, idx);
  93                         return -EINVAL;
  94                 }
  95         }
  96         return 0;
  97 }
  98
  99 /* Emit an instruction */
 100 static inline void
 101 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
 102 {
 103         if (error)
 104                 insn = A64_INVALID_OP_CODE;
 105
 106         if (ctx->ins)
 107                 ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
 108
 109         ctx->idx++;
 110 }
 111
 112 static void
 113 emit_ret(struct a64_jit_ctx *ctx)
 114 {
 115         emit_insn(ctx, 0xd65f03c0, 0);
 116 }
 117
 118 static void
 119 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
 120                  uint8_t rn, int16_t imm12)
 121 {
 122         uint32_t insn, imm;
 123
 124         imm = mask_imm(12, imm12);
 125         insn = (!!is64) << 31;
 126         insn |= (!!sub) << 30;
 127         insn |= 0x11000000;
 128         insn |= rd;
 129         insn |= rn << 5;
 130         insn |= imm << 10;
 131
 132         emit_insn(ctx, insn,
 133                   check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
 134 }
 135
 136 static void
 137 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 138 {
 139         emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
 140 }
 141
 142 static void
 143 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 144 {
 145         emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
 146 }
 147
 148 static void
 149 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
 150 {
 151         emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
 152 }
 153
 154 static void
 155 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
 156 {
 157         emit_mov(ctx, 1, rd, rn);
 158 }
 159
 160 static void
 161 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
 162                 bool push, bool load, bool pre_index)
 163 {
 164         uint32_t insn;
 165
 166         insn = (!!load) << 22;
 167         insn |= (!!pre_index) << 24;
 168         insn |= 0xa8800000;
 169         insn |= rt;
 170         insn |= rn << 5;
 171         insn |= rt2 << 10;
 172         if (push)
 173                 insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
 174         else
 175                 insn |= 0x2 << 15;
 176
 177         emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
 178
 179 }
 180
 181 /* Emit stp rt, rt2, [sp, #-16]! */
 182 static void
 183 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 184 {
 185         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
 186 }
 187
 188 /* Emit ldp rt, rt2, [sp, #16] */
 189 static void
 190 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 191 {
 192         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
 193 }
 194
 195 #define A64_MOVN 0
 196 #define A64_MOVZ 2
 197 #define A64_MOVK 3
 198 static void
 199 mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
 200         uint16_t imm16, uint8_t shift)
 201 {
 202         uint32_t insn;
 203
 204         insn = (!!is64) << 31;
 205         insn |= type << 29;
 206         insn |= 0x25 << 23;
 207         insn |= (shift/16) << 21;
 208         insn |= imm16 << 5;
 209         insn |= rd;
 210
 211         emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
 212 }
 213
 214 static void
 215 emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
 216 {
 217         uint16_t upper = val >> 16;
 218         uint16_t lower = val & 0xffff;
 219
 220         /* Positive number */
 221         if ((val & 1UL << 31) == 0) {
 222                 mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
 223                 if (upper)
 224                         mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
 225         } else { /* Negative number */
 226                 if (upper == 0xffff) {
 227                         mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
 228                 } else {
 229                         mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
 230                         if (lower != 0xffff)
 231                                 mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
 232                 }
 233         }
 234 }
 235
 236 static int
 237 u16_blocks_weight(const uint64_t val, bool one)
 238 {
 239         return (((val >>  0) & 0xffff) == (one ? 0xffff : 0x0000)) +
 240                (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
 241                (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
 242                (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
 243 }
 244
 245 static void
 246 emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
 247 {
 248         uint64_t nval = ~val;
 249         int movn, sr;
 250
 251         if (is64 == 0)
 252                 return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
 253
 254         /* Find MOVN or MOVZ first */
 255         movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
 256         /* Find shift right value */
 257         sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
 258         sr = RTE_ALIGN_FLOOR(sr, 16);
 259         sr = RTE_MAX(sr, 0);
 260
 261         if (movn)
 262                 mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
 263         else
 264                 mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
 265
 266         sr -= 16;
 267         while (sr >= 0) {
 268                 if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
 269                         mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
 270                 sr -= 16;
 271         }
 272 }
 273
 274 #define A64_ADD 0x58
 275 #define A64_SUB 0x258
 276 static void
 277 emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 278              uint8_t rm, uint16_t op)
 279 {
 280         uint32_t insn;
 281
 282         insn = (!!is64) << 31;
 283         insn |= op << 21; /* shift == 0 */
 284         insn |= rm << 16;
 285         insn |= rn << 5;
 286         insn |= rd;
 287
 288         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 289 }
 290
 291 static void
 292 emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 293 {
 294         emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
 295 }
 296
 297 static void
 298 emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 299 {
 300         emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
 301 }
 302
 303 static void
 304 emit_neg(struct a64_jit_ctx *ctx, bool is64, uint8_t rd)
 305 {
 306         emit_add_sub(ctx, is64, rd, A64_ZR, rd, A64_SUB);
 307 }
 308
 309 static void
 310 emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 311 {
 312         uint32_t insn;
 313
 314         insn = (!!is64) << 31;
 315         insn |= 0xd8 << 21;
 316         insn |= rm << 16;
 317         insn |= A64_ZR << 10;
 318         insn |= rd << 5;
 319         insn |= rd;
 320
 321         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 322 }
 323
 324 #define A64_UDIV 0x2
 325 #define A64_LSLV 0x8
 326 #define A64_LSRV 0x9
 327 #define A64_ASRV 0xA
 328 static void
 329 emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 330                           uint8_t rn, uint8_t rm, uint16_t op)
 331
 332 {
 333         uint32_t insn;
 334
 335         insn = (!!is64) << 31;
 336         insn |= 0xd6 << 21;
 337         insn |= rm << 16;
 338         insn |= op << 10;
 339         insn |= rn << 5;
 340         insn |= rd;
 341
 342         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 343 }
 344
 345 static void
 346 emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 347 {
 348         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
 349 }
 350
 351 static void
 352 emit_lslv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 353 {
 354         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSLV);
 355 }
 356
 357 static void
 358 emit_lsrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 359 {
 360         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSRV);
 361 }
 362
 363 static void
 364 emit_asrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 365 {
 366         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_ASRV);
 367 }
 368
 369 #define A64_UBFM 0x2
 370 #define A64_SBFM 0x0
 371 static void
 372 emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 373               uint8_t immr, uint8_t imms, uint16_t op)
 374
 375 {
 376         uint32_t insn;
 377
 378         insn = (!!is64) << 31;
 379         if (insn)
 380                 insn |= 1 << 22; /* Set N bit when is64 is set */
 381         insn |= op << 29;
 382         insn |= 0x26 << 23;
 383         insn |= immr << 16;
 384         insn |= imms << 10;
 385         insn |= rn << 5;
 386         insn |= rd;
 387
 388         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) ||
 389                   check_immr_imms(is64, immr, imms));
 390 }
 391 static void
 392 emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 393 {
 394         const unsigned int width = is64 ? 64 : 32;
 395         uint8_t imms, immr;
 396
 397         immr = (width - imm) & (width - 1);
 398         imms = width - 1 - imm;
 399
 400         emit_bitfield(ctx, is64, rd, rd, immr, imms, A64_UBFM);
 401 }
 402
 403 static void
 404 emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 405 {
 406         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM);
 407 }
 408
 409 static void
 410 emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 411 {
 412         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM);
 413 }
 414
 415 #define A64_AND 0
 416 #define A64_OR 1
 417 #define A64_XOR 2
 418 static void
 419 emit_logical(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 420              uint8_t rm, uint16_t op)
 421 {
 422         uint32_t insn;
 423
 424         insn = (!!is64) << 31;
 425         insn |= op << 29;
 426         insn |= 0x50 << 21;
 427         insn |= rm << 16;
 428         insn |= rd << 5;
 429         insn |= rd;
 430
 431         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 432 }
 433
 434 static void
 435 emit_or(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 436 {
 437         emit_logical(ctx, is64, rd, rm, A64_OR);
 438 }
 439
 440 static void
 441 emit_and(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 442 {
 443         emit_logical(ctx, is64, rd, rm, A64_AND);
 444 }
 445
 446 static void
 447 emit_xor(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 448 {
 449         emit_logical(ctx, is64, rd, rm, A64_XOR);
 450 }
 451
 452 static void
 453 emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 454           uint8_t rm, uint8_t ra)
 455 {
 456         uint32_t insn;
 457
 458         insn = (!!is64) << 31;
 459         insn |= 0xd8 << 21;
 460         insn |= rm << 16;
 461         insn |= 0x1 << 15;
 462         insn |= ra << 10;
 463         insn |= rn << 5;
 464         insn |= rd;
 465
 466         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
 467                   check_reg(ra));
 468 }
 469
 470 static void
 471 emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
 472          uint8_t rm)
 473 {
 474         emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
 475         emit_msub(ctx, is64, rd, tmp, rm, rd);
 476 }
 477
 478 static void
 479 emit_zero_extend(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 480 {
 481         switch (imm) {
 482         case 16:
 483                 /* Zero-extend 16 bits into 64 bits */
 484                 emit_bitfield(ctx, 1, rd, rd, 0, 15, A64_UBFM);
 485                 break;
 486         case 32:
 487                 /* Zero-extend 32 bits into 64 bits */
 488                 emit_bitfield(ctx, 1, rd, rd, 0, 31, A64_UBFM);
 489                 break;
 490         case 64:
 491                 break;
 492         default:
 493                 /* Generate error */
 494                 emit_insn(ctx, 0, 1);
 495         }
 496 }
 497
 498 static void
 499 emit_rev(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 500 {
 501         uint32_t insn;
 502
 503         insn = 0xdac00000;
 504         insn |= rd << 5;
 505         insn |= rd;
 506
 507         switch (imm) {
 508         case 16:
 509                 insn |= 1 << 10;
 510                 emit_insn(ctx, insn, check_reg(rd));
 511                 emit_zero_extend(ctx, rd, 16);
 512                 break;
 513         case 32:
 514                 insn |= 2 << 10;
 515                 emit_insn(ctx, insn, check_reg(rd));
 516                 /* Upper 32 bits already cleared */
 517                 break;
 518         case 64:
 519                 insn |= 3 << 10;
 520                 emit_insn(ctx, insn, check_reg(rd));
 521                 break;
 522         default:
 523                 /* Generate error */
 524                 emit_insn(ctx, insn, 1);
 525         }
 526 }
 527
 528 static int
 529 is_be(void)
 530 {
 531 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
 532         return 1;
 533 #else
 534         return 0;
 535 #endif
 536 }
 537
 538 static void
 539 emit_be(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 540 {
 541         if (is_be())
 542                 emit_zero_extend(ctx, rd, imm);
 543         else
 544                 emit_rev(ctx, rd, imm);
 545 }
 546
 547 static void
 548 emit_le(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 549 {
 550         if (is_be())
 551                 emit_rev(ctx, rd, imm);
 552         else
 553                 emit_zero_extend(ctx, rd, imm);
 554 }
 555
 556 static uint8_t
 557 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
 558 {
 559         const uint32_t ebpf2a64_has_call[] = {
 560                 /* Map A64 R7 register as EBPF return register */
 561                 [EBPF_REG_0] = A64_R(7),
 562                 /* Map A64 arguments register as EBPF arguments register */
 563                 [EBPF_REG_1] = A64_R(0),
 564                 [EBPF_REG_2] = A64_R(1),
 565                 [EBPF_REG_3] = A64_R(2),
 566                 [EBPF_REG_4] = A64_R(3),
 567                 [EBPF_REG_5] = A64_R(4),
 568                 /* Map A64 callee save register as EBPF callee save register */
 569                 [EBPF_REG_6] = A64_R(19),
 570                 [EBPF_REG_7] = A64_R(20),
 571                 [EBPF_REG_8] = A64_R(21),
 572                 [EBPF_REG_9] = A64_R(22),
 573                 [EBPF_FP]    = A64_R(25),
 574                 /* Map A64 scratch registers as temporary storage */
 575                 [TMP_REG_1] = A64_R(9),
 576                 [TMP_REG_2] = A64_R(10),
 577                 [TMP_REG_3] = A64_R(11),
 578         };
 579
 580         const uint32_t ebpf2a64_no_call[] = {
 581                 /* Map A64 R7 register as EBPF return register */
 582                 [EBPF_REG_0] = A64_R(7),
 583                 /* Map A64 arguments register as EBPF arguments register */
 584                 [EBPF_REG_1] = A64_R(0),
 585                 [EBPF_REG_2] = A64_R(1),
 586                 [EBPF_REG_3] = A64_R(2),
 587                 [EBPF_REG_4] = A64_R(3),
 588                 [EBPF_REG_5] = A64_R(4),
 589                 /*
 590                  * EBPF program does not have EBPF_CALL op code,
 591                  * Map A64 scratch registers as EBPF callee save registers.
 592                  */
 593                 [EBPF_REG_6] = A64_R(9),
 594                 [EBPF_REG_7] = A64_R(10),
 595                 [EBPF_REG_8] = A64_R(11),
 596                 [EBPF_REG_9] = A64_R(12),
 597                 /* Map A64 FP register as EBPF FP register */
 598                 [EBPF_FP]    = A64_FP,
 599                 /* Map remaining A64 scratch registers as temporary storage */
 600                 [TMP_REG_1] = A64_R(13),
 601                 [TMP_REG_2] = A64_R(14),
 602                 [TMP_REG_3] = A64_R(15),
 603         };
 604
 605         if (ctx->foundcall)
 606                 return ebpf2a64_has_call[reg];
 607         else
 608                 return ebpf2a64_no_call[reg];
 609 }
 610
 611 /*
 612  * Procedure call standard for the arm64
 613  * -------------------------------------
 614  * R0..R7  - Parameter/result registers
 615  * R8      - Indirect result location register
 616  * R9..R15 - Scratch registers
 617  * R15     - Platform Register
 618  * R16     - First intra-procedure-call scratch register
 619  * R17     - Second intra-procedure-call temporary register
 620  * R19-R28 - Callee saved registers
 621  * R29     - Frame pointer
 622  * R30     - Link register
 623  * R31     - Stack pointer
 624  */
 625 static void
 626 emit_prologue_has_call(struct a64_jit_ctx *ctx)
 627 {
 628         uint8_t r6, r7, r8, r9, fp;
 629
 630         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 631         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 632         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 633         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 634         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 635
 636         /*
 637          * eBPF prog stack layout
 638          *
 639          *                               high
 640          *       eBPF prologue       0:+-----+ <= original A64_SP
 641          *                             |FP/LR|
 642          *                         -16:+-----+ <= current A64_FP
 643          *    Callee saved registers   | ... |
 644          *             EBPF_FP =>  -64:+-----+
 645          *                             |     |
 646          *       eBPF prog stack       | ... |
 647          *                             |     |
 648          * (EBPF_FP - bpf->stack_sz)=> +-----+
 649          * Pad for A64_SP 16B alignment| PAD |
 650          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 651          *                             |     |
 652          *                             | ... | Function call stack
 653          *                             |     |
 654          *                             +-----+
 655          *                              low
 656          */
 657         emit_stack_push(ctx, A64_FP, A64_LR);
 658         emit_mov_64(ctx, A64_FP, A64_SP);
 659         emit_stack_push(ctx, r6, r7);
 660         emit_stack_push(ctx, r8, r9);
 661         /*
 662          * There is no requirement to save A64_R(28) in stack. Doing it here,
 663          * because, A64_SP needs be to 16B aligned and STR vs STP
 664          * takes same number of cycles(typically).
 665          */
 666         emit_stack_push(ctx, fp, A64_R(28));
 667         emit_mov_64(ctx, fp, A64_SP);
 668         if (ctx->stack_sz)
 669                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 670 }
 671
 672 static void
 673 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
 674 {
 675         uint8_t r6, r7, r8, r9, fp, r0;
 676
 677         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 678         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 679         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 680         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 681         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 682         r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 683
 684         if (ctx->stack_sz)
 685                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 686         emit_stack_pop(ctx, fp, A64_R(28));
 687         emit_stack_pop(ctx, r8, r9);
 688         emit_stack_pop(ctx, r6, r7);
 689         emit_stack_pop(ctx, A64_FP, A64_LR);
 690         emit_mov_64(ctx, A64_R(0), r0);
 691         emit_ret(ctx);
 692 }
 693
 694 static void
 695 emit_prologue_no_call(struct a64_jit_ctx *ctx)
 696 {
 697         /*
 698          * eBPF prog stack layout without EBPF_CALL opcode
 699          *
 700          *                               high
 701          *    eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
 702          *                             |     |
 703          *                             | ... |
 704          *            eBPF prog stack  |     |
 705          *                             |     |
 706          * (EBPF_FP - bpf->stack_sz)=> +-----+
 707          * Pad for A64_SP 16B alignment| PAD |
 708          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 709          *                             |     |
 710          *                             | ... | Function call stack
 711          *                             |     |
 712          *                             +-----+
 713          *                              low
 714          */
 715         if (ctx->stack_sz) {
 716                 emit_mov_64(ctx, A64_FP, A64_SP);
 717                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 718         }
 719 }
 720
 721 static void
 722 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
 723 {
 724         if (ctx->stack_sz)
 725                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 726         emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
 727         emit_ret(ctx);
 728 }
 729
 730 static void
 731 emit_prologue(struct a64_jit_ctx *ctx)
 732 {
 733         if (ctx->foundcall)
 734                 emit_prologue_has_call(ctx);
 735         else
 736                 emit_prologue_no_call(ctx);
 737
 738         ctx->program_start = ctx->idx;
 739 }
 740
 741 static void
 742 emit_epilogue(struct a64_jit_ctx *ctx)
 743 {
 744         ctx->program_sz = ctx->idx - ctx->program_start;
 745
 746         if (ctx->foundcall)
 747                 emit_epilogue_has_call(ctx);
 748         else
 749                 emit_epilogue_no_call(ctx);
 750 }
 751
 752 static void
 753 emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19)
 754 {
 755         uint32_t insn, imm;
 756
 757         imm = mask_imm(19, imm19);
 758         insn = (!!is64) << 31;
 759         insn |= 0x35 << 24;
 760         insn |= imm << 5;
 761         insn |= rt;
 762
 763         emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19));
 764 }
 765
 766 static void
 767 emit_b(struct a64_jit_ctx *ctx, int32_t imm26)
 768 {
 769         uint32_t insn, imm;
 770
 771         imm = mask_imm(26, imm26);
 772         insn = 0x5 << 26;
 773         insn |= imm;
 774
 775         emit_insn(ctx, insn, check_imm(26, imm26));
 776 }
 777
 778 static void
 779 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
 780 {
 781         uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 782         uint16_t jump_to_epilogue;
 783
 784         emit_cbnz(ctx, is64, src, 3);
 785         emit_mov_imm(ctx, is64, r0, 0);
 786         jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
 787         emit_b(ctx, jump_to_epilogue);
 788 }
 789
 790 static void
 791 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 792 {
 793         const struct ebpf_insn *ins;
 794         uint8_t op;
 795         uint32_t i;
 796
 797         for (i = 0; i != bpf->prm.nb_ins; i++) {
 798                 ins = bpf->prm.ins + i;
 799                 op = ins->code;
 800
 801                 switch (op) {
 802                 /* Call imm */
 803                 case (BPF_JMP | EBPF_CALL):
 804                         ctx->foundcall = 1;
 805                         return;
 806                 }
 807         }
 808 }
 809
 810 /*
 811  * Walk through eBPF code and translate them to arm64 one.
 812  */
 813 static int
 814 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 815 {
 816         uint8_t op, dst, src, tmp1, tmp2;
 817         const struct ebpf_insn *ins;
 818         int32_t imm;
 819         uint32_t i;
 820         bool is64;
 821         int rc;
 822
 823         /* Reset context fields */
 824         ctx->idx = 0;
 825         /* arm64 SP must be aligned to 16 */
 826         ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
 827         tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
 828         tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
 829
 830         emit_prologue(ctx);
 831
 832         for (i = 0; i != bpf->prm.nb_ins; i++) {
 833
 834                 ins = bpf->prm.ins + i;
 835                 op = ins->code;
 836                 imm = ins->imm;
 837
 838                 dst = ebpf_to_a64_reg(ctx, ins->dst_reg);
 839                 src = ebpf_to_a64_reg(ctx, ins->src_reg);
 840                 is64 = (BPF_CLASS(op) == EBPF_ALU64);
 841
 842                 switch (op) {
 843                 /* dst = src */
 844                 case (BPF_ALU | EBPF_MOV | BPF_X):
 845                 case (EBPF_ALU64 | EBPF_MOV | BPF_X):
 846                         emit_mov(ctx, is64, dst, src);
 847                         break;
 848                 /* dst = imm */
 849                 case (BPF_ALU | EBPF_MOV | BPF_K):
 850                 case (EBPF_ALU64 | EBPF_MOV | BPF_K):
 851                         emit_mov_imm(ctx, is64, dst, imm);
 852                         break;
 853                 /* dst += src */
 854                 case (BPF_ALU | BPF_ADD | BPF_X):
 855                 case (EBPF_ALU64 | BPF_ADD | BPF_X):
 856                         emit_add(ctx, is64, dst, src);
 857                         break;
 858                 /* dst += imm */
 859                 case (BPF_ALU | BPF_ADD | BPF_K):
 860                 case (EBPF_ALU64 | BPF_ADD | BPF_K):
 861                         emit_mov_imm(ctx, is64, tmp1, imm);
 862                         emit_add(ctx, is64, dst, tmp1);
 863                         break;
 864                 /* dst -= src */
 865                 case (BPF_ALU | BPF_SUB | BPF_X):
 866                 case (EBPF_ALU64 | BPF_SUB | BPF_X):
 867                         emit_sub(ctx, is64, dst, src);
 868                         break;
 869                 /* dst -= imm */
 870                 case (BPF_ALU | BPF_SUB | BPF_K):
 871                 case (EBPF_ALU64 | BPF_SUB | BPF_K):
 872                         emit_mov_imm(ctx, is64, tmp1, imm);
 873                         emit_sub(ctx, is64, dst, tmp1);
 874                         break;
 875                 /* dst *= src */
 876                 case (BPF_ALU | BPF_MUL | BPF_X):
 877                 case (EBPF_ALU64 | BPF_MUL | BPF_X):
 878                         emit_mul(ctx, is64, dst, src);
 879                         break;
 880                 /* dst *= imm */
 881                 case (BPF_ALU | BPF_MUL | BPF_K):
 882                 case (EBPF_ALU64 | BPF_MUL | BPF_K):
 883                         emit_mov_imm(ctx, is64, tmp1, imm);
 884                         emit_mul(ctx, is64, dst, tmp1);
 885                         break;
 886                 /* dst /= src */
 887                 case (BPF_ALU | BPF_DIV | BPF_X):
 888                 case (EBPF_ALU64 | BPF_DIV | BPF_X):
 889                         emit_return_zero_if_src_zero(ctx, is64, src);
 890                         emit_div(ctx, is64, dst, src);
 891                         break;
 892                 /* dst /= imm */
 893                 case (BPF_ALU | BPF_DIV | BPF_K):
 894                 case (EBPF_ALU64 | BPF_DIV | BPF_K):
 895                         emit_mov_imm(ctx, is64, tmp1, imm);
 896                         emit_div(ctx, is64, dst, tmp1);
 897                         break;
 898                 /* dst %= src */
 899                 case (BPF_ALU | BPF_MOD | BPF_X):
 900                 case (EBPF_ALU64 | BPF_MOD | BPF_X):
 901                         emit_return_zero_if_src_zero(ctx, is64, src);
 902                         emit_mod(ctx, is64, tmp1, dst, src);
 903                         break;
 904                 /* dst %= imm */
 905                 case (BPF_ALU | BPF_MOD | BPF_K):
 906                 case (EBPF_ALU64 | BPF_MOD | BPF_K):
 907                         emit_mov_imm(ctx, is64, tmp1, imm);
 908                         emit_mod(ctx, is64, tmp2, dst, tmp1);
 909                         break;
 910                 /* dst |= src */
 911                 case (BPF_ALU | BPF_OR | BPF_X):
 912                 case (EBPF_ALU64 | BPF_OR | BPF_X):
 913                         emit_or(ctx, is64, dst, src);
 914                         break;
 915                 /* dst |= imm */
 916                 case (BPF_ALU | BPF_OR | BPF_K):
 917                 case (EBPF_ALU64 | BPF_OR | BPF_K):
 918                         emit_mov_imm(ctx, is64, tmp1, imm);
 919                         emit_or(ctx, is64, dst, tmp1);
 920                         break;
 921                 /* dst &= src */
 922                 case (BPF_ALU | BPF_AND | BPF_X):
 923                 case (EBPF_ALU64 | BPF_AND | BPF_X):
 924                         emit_and(ctx, is64, dst, src);
 925                         break;
 926                 /* dst &= imm */
 927                 case (BPF_ALU | BPF_AND | BPF_K):
 928                 case (EBPF_ALU64 | BPF_AND | BPF_K):
 929                         emit_mov_imm(ctx, is64, tmp1, imm);
 930                         emit_and(ctx, is64, dst, tmp1);
 931                         break;
 932                 /* dst ^= src */
 933                 case (BPF_ALU | BPF_XOR | BPF_X):
 934                 case (EBPF_ALU64 | BPF_XOR | BPF_X):
 935                         emit_xor(ctx, is64, dst, src);
 936                         break;
 937                 /* dst ^= imm */
 938                 case (BPF_ALU | BPF_XOR | BPF_K):
 939                 case (EBPF_ALU64 | BPF_XOR | BPF_K):
 940                         emit_mov_imm(ctx, is64, tmp1, imm);
 941                         emit_xor(ctx, is64, dst, tmp1);
 942                         break;
 943                 /* dst = -dst */
 944                 case (BPF_ALU | BPF_NEG):
 945                 case (EBPF_ALU64 | BPF_NEG):
 946                         emit_neg(ctx, is64, dst);
 947                         break;
 948                 /* dst <<= src */
 949                 case BPF_ALU | BPF_LSH | BPF_X:
 950                 case EBPF_ALU64 | BPF_LSH | BPF_X:
 951                         emit_lslv(ctx, is64, dst, src);
 952                         break;
 953                 /* dst <<= imm */
 954                 case BPF_ALU | BPF_LSH | BPF_K:
 955                 case EBPF_ALU64 | BPF_LSH | BPF_K:
 956                         emit_lsl(ctx, is64, dst, imm);
 957                         break;
 958                 /* dst >>= src */
 959                 case BPF_ALU | BPF_RSH | BPF_X:
 960                 case EBPF_ALU64 | BPF_RSH | BPF_X:
 961                         emit_lsrv(ctx, is64, dst, src);
 962                         break;
 963                 /* dst >>= imm */
 964                 case BPF_ALU | BPF_RSH | BPF_K:
 965                 case EBPF_ALU64 | BPF_RSH | BPF_K:
 966                         emit_lsr(ctx, is64, dst, imm);
 967                         break;
 968                 /* dst >>= src (arithmetic) */
 969                 case BPF_ALU | EBPF_ARSH | BPF_X:
 970                 case EBPF_ALU64 | EBPF_ARSH | BPF_X:
 971                         emit_asrv(ctx, is64, dst, src);
 972                         break;
 973                 /* dst >>= imm (arithmetic) */
 974                 case BPF_ALU | EBPF_ARSH | BPF_K:
 975                 case EBPF_ALU64 | EBPF_ARSH | BPF_K:
 976                         emit_asr(ctx, is64, dst, imm);
 977                         break;
 978                 /* dst = be##imm(dst) */
 979                 case (BPF_ALU | EBPF_END | EBPF_TO_BE):
 980                         emit_be(ctx, dst, imm);
 981                         break;
 982                 /* dst = le##imm(dst) */
 983                 case (BPF_ALU | EBPF_END | EBPF_TO_LE):
 984                         emit_le(ctx, dst, imm);
 985                         break;
 986                 /* Return r0 */
 987                 case (BPF_JMP | EBPF_EXIT):
 988                         emit_epilogue(ctx);
 989                         break;
 990                 default:
 991                         RTE_BPF_LOG(ERR,
 992                                 "%s(%p): invalid opcode %#x at pc: %u;\n",
 993                                 __func__, bpf, ins->code, i);
 994                         return -EINVAL;
 995                 }
 996         }
 997         rc = check_invalid_args(ctx, ctx->idx);
 998
 999         return rc;
1000 }
1001
1002 /*
1003  * Produce a native ISA version of the given BPF code.
1004  */
1005 int
1006 bpf_jit_arm64(struct rte_bpf *bpf)
1007 {
1008         struct a64_jit_ctx ctx;
1009         size_t size;
1010         int rc;
1011
1012         /* Init JIT context */
1013         memset(&ctx, 0, sizeof(ctx));
1014
1015         /* Find eBPF program has call class or not */
1016         check_program_has_call(&ctx, bpf);
1017
1018         /* First pass to calculate total code size and valid jump offsets */
1019         rc = emit(&ctx, bpf);
1020         if (rc)
1021                 goto finish;
1022
1023         size = ctx.idx * sizeof(uint32_t);
1024         /* Allocate JIT program memory */
1025         ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
1026                                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1027         if (ctx.ins == MAP_FAILED) {
1028                 rc = -ENOMEM;
1029                 goto finish;
1030         }
1031
1032         /* Second pass to generate code */
1033         rc = emit(&ctx, bpf);
1034         if (rc)
1035                 goto munmap;
1036
1037         rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
1038         if (rc) {
1039                 rc = -errno;
1040                 goto munmap;
1041         }
1042
1043         /* Flush the icache */
1044         __builtin___clear_cache(ctx.ins, ctx.ins + ctx.idx);
1045
1046         bpf->jit.func = (void *)ctx.ins;
1047         bpf->jit.sz = size;
1048
1049         goto finish;
1050
1051 munmap:
1052         munmap(ctx.ins, size);
1053 finish:
1054         return rc;
1055 }