lib/librte_bpf/bpf_jit_arm64.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(C) 2019 Marvell International Ltd.
   3  */
   4
   5 #include <errno.h>
   6 #include <stdbool.h>
   7
   8 #include <rte_common.h>
   9 #include <rte_byteorder.h>
  10
  11 #include "bpf_impl.h"
  12
  13 #define A64_REG_MASK(r)         ((r) & 0x1f)
  14 #define A64_INVALID_OP_CODE     (0xffffffff)
  15
  16 #define TMP_REG_1               (EBPF_REG_10 + 1)
  17 #define TMP_REG_2               (EBPF_REG_10 + 2)
  18 #define TMP_REG_3               (EBPF_REG_10 + 3)
  19
  20 #define EBPF_FP                 (EBPF_REG_10)
  21 #define EBPF_OP_GET(op)         (BPF_OP(op) >> 4)
  22
  23 #define A64_R(x)                x
  24 #define A64_FP                  29
  25 #define A64_LR                  30
  26 #define A64_SP                  31
  27 #define A64_ZR                  31
  28
  29 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
  30 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
  31
  32 struct ebpf_a64_map {
  33         uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
  34         uint8_t off_to_b; /* Offset to branch instruction delta */
  35 };
  36
  37 struct a64_jit_ctx {
  38         size_t stack_sz;          /* Stack size */
  39         uint32_t *ins;            /* ARM64 instructions. NULL if first pass */
  40         struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
  41         uint32_t idx;             /* Current instruction index */
  42         uint32_t program_start;   /* Program index, Just after prologue */
  43         uint32_t program_sz;      /* Program size. Found in first pass */
  44         uint8_t foundcall;        /* Found EBPF_CALL class code in eBPF pgm */
  45 };
  46
  47 static int
  48 check_immr_imms(bool is64, uint8_t immr, uint8_t imms)
  49 {
  50         const unsigned int width = is64 ? 64 : 32;
  51
  52         if (immr >= width || imms >= width)
  53                 return 1;
  54
  55         return 0;
  56 }
  57
  58 static int
  59 check_mov_hw(bool is64, const uint8_t val)
  60 {
  61         if (val == 16 || val == 0)
  62                 return 0;
  63         else if (is64 && val != 64 && val != 48 && val != 32)
  64                 return 1;
  65
  66         return 0;
  67 }
  68
  69 static int
  70 check_ls_sz(uint8_t sz)
  71 {
  72         if (sz == BPF_B || sz == BPF_H || sz == BPF_W || sz == EBPF_DW)
  73                 return 0;
  74
  75         return 1;
  76 }
  77
  78 static int
  79 check_reg(uint8_t r)
  80 {
  81         return (r > 31) ? 1 : 0;
  82 }
  83
  84 static int
  85 is_first_pass(struct a64_jit_ctx *ctx)
  86 {
  87         return (ctx->ins == NULL);
  88 }
  89
  90 static int
  91 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
  92 {
  93         uint32_t idx;
  94
  95         if (is_first_pass(ctx))
  96                 return 0;
  97
  98         for (idx = 0; idx < limit; idx++) {
  99                 if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
 100                         RTE_BPF_LOG(ERR,
 101                                 "%s: invalid opcode at %u;\n", __func__, idx);
 102                         return -EINVAL;
 103                 }
 104         }
 105         return 0;
 106 }
 107
 108 /* Emit an instruction */
 109 static inline void
 110 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
 111 {
 112         if (error)
 113                 insn = A64_INVALID_OP_CODE;
 114
 115         if (ctx->ins)
 116                 ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
 117
 118         ctx->idx++;
 119 }
 120
 121 static void
 122 emit_ret(struct a64_jit_ctx *ctx)
 123 {
 124         emit_insn(ctx, 0xd65f03c0, 0);
 125 }
 126
 127 static void
 128 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
 129                  uint8_t rn, int16_t imm12)
 130 {
 131         uint32_t insn, imm;
 132
 133         imm = mask_imm(12, imm12);
 134         insn = (!!is64) << 31;
 135         insn |= (!!sub) << 30;
 136         insn |= 0x11000000;
 137         insn |= rd;
 138         insn |= rn << 5;
 139         insn |= imm << 10;
 140
 141         emit_insn(ctx, insn,
 142                   check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
 143 }
 144
 145 static void
 146 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 147 {
 148         emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
 149 }
 150
 151 static void
 152 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
 153 {
 154         emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
 155 }
 156
 157 static void
 158 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
 159 {
 160         emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
 161 }
 162
 163 static void
 164 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
 165 {
 166         emit_mov(ctx, 1, rd, rn);
 167 }
 168
 169 static void
 170 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
 171                 bool push, bool load, bool pre_index)
 172 {
 173         uint32_t insn;
 174
 175         insn = (!!load) << 22;
 176         insn |= (!!pre_index) << 24;
 177         insn |= 0xa8800000;
 178         insn |= rt;
 179         insn |= rn << 5;
 180         insn |= rt2 << 10;
 181         if (push)
 182                 insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
 183         else
 184                 insn |= 0x2 << 15;
 185
 186         emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
 187
 188 }
 189
 190 /* Emit stp rt, rt2, [sp, #-16]! */
 191 static void
 192 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 193 {
 194         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
 195 }
 196
 197 /* Emit ldp rt, rt2, [sp, #16] */
 198 static void
 199 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
 200 {
 201         emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
 202 }
 203
 204 #define A64_MOVN 0
 205 #define A64_MOVZ 2
 206 #define A64_MOVK 3
 207 static void
 208 mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
 209         uint16_t imm16, uint8_t shift)
 210 {
 211         uint32_t insn;
 212
 213         insn = (!!is64) << 31;
 214         insn |= type << 29;
 215         insn |= 0x25 << 23;
 216         insn |= (shift/16) << 21;
 217         insn |= imm16 << 5;
 218         insn |= rd;
 219
 220         emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
 221 }
 222
 223 static void
 224 emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
 225 {
 226         uint16_t upper = val >> 16;
 227         uint16_t lower = val & 0xffff;
 228
 229         /* Positive number */
 230         if ((val & 1UL << 31) == 0) {
 231                 mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
 232                 if (upper)
 233                         mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
 234         } else { /* Negative number */
 235                 if (upper == 0xffff) {
 236                         mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
 237                 } else {
 238                         mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
 239                         if (lower != 0xffff)
 240                                 mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
 241                 }
 242         }
 243 }
 244
 245 static int
 246 u16_blocks_weight(const uint64_t val, bool one)
 247 {
 248         return (((val >>  0) & 0xffff) == (one ? 0xffff : 0x0000)) +
 249                (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
 250                (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
 251                (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
 252 }
 253
 254 static void
 255 emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
 256 {
 257         uint64_t nval = ~val;
 258         int movn, sr;
 259
 260         if (is64 == 0)
 261                 return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
 262
 263         /* Find MOVN or MOVZ first */
 264         movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
 265         /* Find shift right value */
 266         sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
 267         sr = RTE_ALIGN_FLOOR(sr, 16);
 268         sr = RTE_MAX(sr, 0);
 269
 270         if (movn)
 271                 mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
 272         else
 273                 mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
 274
 275         sr -= 16;
 276         while (sr >= 0) {
 277                 if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
 278                         mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
 279                 sr -= 16;
 280         }
 281 }
 282
 283 static void
 284 emit_ls(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, uint8_t rm,
 285         bool load)
 286 {
 287         uint32_t insn;
 288
 289         insn = 0x1c1 << 21;
 290         if (load)
 291                 insn |= 1 << 22;
 292         if (sz == BPF_B)
 293                 insn |= 0 << 30;
 294         else if (sz == BPF_H)
 295                 insn |= 1 << 30;
 296         else if (sz == BPF_W)
 297                 insn |= 2 << 30;
 298         else if (sz == EBPF_DW)
 299                 insn |= 3 << 30;
 300
 301         insn |= rm << 16;
 302         insn |= 0x1a << 10; /* LSL and S = 0 */
 303         insn |= rn << 5;
 304         insn |= rt;
 305
 306         emit_insn(ctx, insn, check_reg(rt) || check_reg(rn) || check_reg(rm) ||
 307                   check_ls_sz(sz));
 308 }
 309
 310 static void
 311 emit_str(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
 312          uint8_t rm)
 313 {
 314         emit_ls(ctx, sz, rt, rn, rm, 0);
 315 }
 316
 317 static void
 318 emit_ldr(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
 319          uint8_t rm)
 320 {
 321         emit_ls(ctx, sz, rt, rn, rm, 1);
 322 }
 323
 324 #define A64_ADD 0x58
 325 #define A64_SUB 0x258
 326 static void
 327 emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 328              uint8_t rm, uint16_t op)
 329 {
 330         uint32_t insn;
 331
 332         insn = (!!is64) << 31;
 333         insn |= op << 21; /* shift == 0 */
 334         insn |= rm << 16;
 335         insn |= rn << 5;
 336         insn |= rd;
 337
 338         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 339 }
 340
 341 static void
 342 emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 343 {
 344         emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
 345 }
 346
 347 static void
 348 emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 349 {
 350         emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
 351 }
 352
 353 static void
 354 emit_neg(struct a64_jit_ctx *ctx, bool is64, uint8_t rd)
 355 {
 356         emit_add_sub(ctx, is64, rd, A64_ZR, rd, A64_SUB);
 357 }
 358
 359 static void
 360 emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 361 {
 362         uint32_t insn;
 363
 364         insn = (!!is64) << 31;
 365         insn |= 0xd8 << 21;
 366         insn |= rm << 16;
 367         insn |= A64_ZR << 10;
 368         insn |= rd << 5;
 369         insn |= rd;
 370
 371         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 372 }
 373
 374 #define A64_UDIV 0x2
 375 #define A64_LSLV 0x8
 376 #define A64_LSRV 0x9
 377 #define A64_ASRV 0xA
 378 static void
 379 emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 380                           uint8_t rn, uint8_t rm, uint16_t op)
 381
 382 {
 383         uint32_t insn;
 384
 385         insn = (!!is64) << 31;
 386         insn |= 0xd6 << 21;
 387         insn |= rm << 16;
 388         insn |= op << 10;
 389         insn |= rn << 5;
 390         insn |= rd;
 391
 392         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 393 }
 394
 395 static void
 396 emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 397 {
 398         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
 399 }
 400
 401 static void
 402 emit_lslv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 403 {
 404         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSLV);
 405 }
 406
 407 static void
 408 emit_lsrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 409 {
 410         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSRV);
 411 }
 412
 413 static void
 414 emit_asrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 415 {
 416         emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_ASRV);
 417 }
 418
 419 #define A64_UBFM 0x2
 420 #define A64_SBFM 0x0
 421 static void
 422 emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 423               uint8_t immr, uint8_t imms, uint16_t op)
 424
 425 {
 426         uint32_t insn;
 427
 428         insn = (!!is64) << 31;
 429         if (insn)
 430                 insn |= 1 << 22; /* Set N bit when is64 is set */
 431         insn |= op << 29;
 432         insn |= 0x26 << 23;
 433         insn |= immr << 16;
 434         insn |= imms << 10;
 435         insn |= rn << 5;
 436         insn |= rd;
 437
 438         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) ||
 439                   check_immr_imms(is64, immr, imms));
 440 }
 441 static void
 442 emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 443 {
 444         const unsigned int width = is64 ? 64 : 32;
 445         uint8_t imms, immr;
 446
 447         immr = (width - imm) & (width - 1);
 448         imms = width - 1 - imm;
 449
 450         emit_bitfield(ctx, is64, rd, rd, immr, imms, A64_UBFM);
 451 }
 452
 453 static void
 454 emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 455 {
 456         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM);
 457 }
 458
 459 static void
 460 emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
 461 {
 462         emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM);
 463 }
 464
 465 #define A64_AND 0
 466 #define A64_OR 1
 467 #define A64_XOR 2
 468 static void
 469 emit_logical(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
 470              uint8_t rm, uint16_t op)
 471 {
 472         uint32_t insn;
 473
 474         insn = (!!is64) << 31;
 475         insn |= op << 29;
 476         insn |= 0x50 << 21;
 477         insn |= rm << 16;
 478         insn |= rd << 5;
 479         insn |= rd;
 480
 481         emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
 482 }
 483
 484 static void
 485 emit_or(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 486 {
 487         emit_logical(ctx, is64, rd, rm, A64_OR);
 488 }
 489
 490 static void
 491 emit_and(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 492 {
 493         emit_logical(ctx, is64, rd, rm, A64_AND);
 494 }
 495
 496 static void
 497 emit_xor(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
 498 {
 499         emit_logical(ctx, is64, rd, rm, A64_XOR);
 500 }
 501
 502 static void
 503 emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
 504           uint8_t rm, uint8_t ra)
 505 {
 506         uint32_t insn;
 507
 508         insn = (!!is64) << 31;
 509         insn |= 0xd8 << 21;
 510         insn |= rm << 16;
 511         insn |= 0x1 << 15;
 512         insn |= ra << 10;
 513         insn |= rn << 5;
 514         insn |= rd;
 515
 516         emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
 517                   check_reg(ra));
 518 }
 519
 520 static void
 521 emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
 522          uint8_t rm)
 523 {
 524         emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
 525         emit_msub(ctx, is64, rd, tmp, rm, rd);
 526 }
 527
 528 static void
 529 emit_zero_extend(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 530 {
 531         switch (imm) {
 532         case 16:
 533                 /* Zero-extend 16 bits into 64 bits */
 534                 emit_bitfield(ctx, 1, rd, rd, 0, 15, A64_UBFM);
 535                 break;
 536         case 32:
 537                 /* Zero-extend 32 bits into 64 bits */
 538                 emit_bitfield(ctx, 1, rd, rd, 0, 31, A64_UBFM);
 539                 break;
 540         case 64:
 541                 break;
 542         default:
 543                 /* Generate error */
 544                 emit_insn(ctx, 0, 1);
 545         }
 546 }
 547
 548 static void
 549 emit_rev(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 550 {
 551         uint32_t insn;
 552
 553         insn = 0xdac00000;
 554         insn |= rd << 5;
 555         insn |= rd;
 556
 557         switch (imm) {
 558         case 16:
 559                 insn |= 1 << 10;
 560                 emit_insn(ctx, insn, check_reg(rd));
 561                 emit_zero_extend(ctx, rd, 16);
 562                 break;
 563         case 32:
 564                 insn |= 2 << 10;
 565                 emit_insn(ctx, insn, check_reg(rd));
 566                 /* Upper 32 bits already cleared */
 567                 break;
 568         case 64:
 569                 insn |= 3 << 10;
 570                 emit_insn(ctx, insn, check_reg(rd));
 571                 break;
 572         default:
 573                 /* Generate error */
 574                 emit_insn(ctx, insn, 1);
 575         }
 576 }
 577
 578 static int
 579 is_be(void)
 580 {
 581 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
 582         return 1;
 583 #else
 584         return 0;
 585 #endif
 586 }
 587
 588 static void
 589 emit_be(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 590 {
 591         if (is_be())
 592                 emit_zero_extend(ctx, rd, imm);
 593         else
 594                 emit_rev(ctx, rd, imm);
 595 }
 596
 597 static void
 598 emit_le(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
 599 {
 600         if (is_be())
 601                 emit_rev(ctx, rd, imm);
 602         else
 603                 emit_zero_extend(ctx, rd, imm);
 604 }
 605
 606 static uint8_t
 607 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
 608 {
 609         const uint32_t ebpf2a64_has_call[] = {
 610                 /* Map A64 R7 register as EBPF return register */
 611                 [EBPF_REG_0] = A64_R(7),
 612                 /* Map A64 arguments register as EBPF arguments register */
 613                 [EBPF_REG_1] = A64_R(0),
 614                 [EBPF_REG_2] = A64_R(1),
 615                 [EBPF_REG_3] = A64_R(2),
 616                 [EBPF_REG_4] = A64_R(3),
 617                 [EBPF_REG_5] = A64_R(4),
 618                 /* Map A64 callee save register as EBPF callee save register */
 619                 [EBPF_REG_6] = A64_R(19),
 620                 [EBPF_REG_7] = A64_R(20),
 621                 [EBPF_REG_8] = A64_R(21),
 622                 [EBPF_REG_9] = A64_R(22),
 623                 [EBPF_FP]    = A64_R(25),
 624                 /* Map A64 scratch registers as temporary storage */
 625                 [TMP_REG_1] = A64_R(9),
 626                 [TMP_REG_2] = A64_R(10),
 627                 [TMP_REG_3] = A64_R(11),
 628         };
 629
 630         const uint32_t ebpf2a64_no_call[] = {
 631                 /* Map A64 R7 register as EBPF return register */
 632                 [EBPF_REG_0] = A64_R(7),
 633                 /* Map A64 arguments register as EBPF arguments register */
 634                 [EBPF_REG_1] = A64_R(0),
 635                 [EBPF_REG_2] = A64_R(1),
 636                 [EBPF_REG_3] = A64_R(2),
 637                 [EBPF_REG_4] = A64_R(3),
 638                 [EBPF_REG_5] = A64_R(4),
 639                 /*
 640                  * EBPF program does not have EBPF_CALL op code,
 641                  * Map A64 scratch registers as EBPF callee save registers.
 642                  */
 643                 [EBPF_REG_6] = A64_R(9),
 644                 [EBPF_REG_7] = A64_R(10),
 645                 [EBPF_REG_8] = A64_R(11),
 646                 [EBPF_REG_9] = A64_R(12),
 647                 /* Map A64 FP register as EBPF FP register */
 648                 [EBPF_FP]    = A64_FP,
 649                 /* Map remaining A64 scratch registers as temporary storage */
 650                 [TMP_REG_1] = A64_R(13),
 651                 [TMP_REG_2] = A64_R(14),
 652                 [TMP_REG_3] = A64_R(15),
 653         };
 654
 655         if (ctx->foundcall)
 656                 return ebpf2a64_has_call[reg];
 657         else
 658                 return ebpf2a64_no_call[reg];
 659 }
 660
 661 /*
 662  * Procedure call standard for the arm64
 663  * -------------------------------------
 664  * R0..R7  - Parameter/result registers
 665  * R8      - Indirect result location register
 666  * R9..R15 - Scratch registers
 667  * R15     - Platform Register
 668  * R16     - First intra-procedure-call scratch register
 669  * R17     - Second intra-procedure-call temporary register
 670  * R19-R28 - Callee saved registers
 671  * R29     - Frame pointer
 672  * R30     - Link register
 673  * R31     - Stack pointer
 674  */
 675 static void
 676 emit_prologue_has_call(struct a64_jit_ctx *ctx)
 677 {
 678         uint8_t r6, r7, r8, r9, fp;
 679
 680         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 681         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 682         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 683         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 684         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 685
 686         /*
 687          * eBPF prog stack layout
 688          *
 689          *                               high
 690          *       eBPF prologue       0:+-----+ <= original A64_SP
 691          *                             |FP/LR|
 692          *                         -16:+-----+ <= current A64_FP
 693          *    Callee saved registers   | ... |
 694          *             EBPF_FP =>  -64:+-----+
 695          *                             |     |
 696          *       eBPF prog stack       | ... |
 697          *                             |     |
 698          * (EBPF_FP - bpf->stack_sz)=> +-----+
 699          * Pad for A64_SP 16B alignment| PAD |
 700          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 701          *                             |     |
 702          *                             | ... | Function call stack
 703          *                             |     |
 704          *                             +-----+
 705          *                              low
 706          */
 707         emit_stack_push(ctx, A64_FP, A64_LR);
 708         emit_mov_64(ctx, A64_FP, A64_SP);
 709         emit_stack_push(ctx, r6, r7);
 710         emit_stack_push(ctx, r8, r9);
 711         /*
 712          * There is no requirement to save A64_R(28) in stack. Doing it here,
 713          * because, A64_SP needs be to 16B aligned and STR vs STP
 714          * takes same number of cycles(typically).
 715          */
 716         emit_stack_push(ctx, fp, A64_R(28));
 717         emit_mov_64(ctx, fp, A64_SP);
 718         if (ctx->stack_sz)
 719                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 720 }
 721
 722 static void
 723 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
 724 {
 725         uint8_t r6, r7, r8, r9, fp, r0;
 726
 727         r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
 728         r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
 729         r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
 730         r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
 731         fp = ebpf_to_a64_reg(ctx, EBPF_FP);
 732         r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 733
 734         if (ctx->stack_sz)
 735                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 736         emit_stack_pop(ctx, fp, A64_R(28));
 737         emit_stack_pop(ctx, r8, r9);
 738         emit_stack_pop(ctx, r6, r7);
 739         emit_stack_pop(ctx, A64_FP, A64_LR);
 740         emit_mov_64(ctx, A64_R(0), r0);
 741         emit_ret(ctx);
 742 }
 743
 744 static void
 745 emit_prologue_no_call(struct a64_jit_ctx *ctx)
 746 {
 747         /*
 748          * eBPF prog stack layout without EBPF_CALL opcode
 749          *
 750          *                               high
 751          *    eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
 752          *                             |     |
 753          *                             | ... |
 754          *            eBPF prog stack  |     |
 755          *                             |     |
 756          * (EBPF_FP - bpf->stack_sz)=> +-----+
 757          * Pad for A64_SP 16B alignment| PAD |
 758          * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
 759          *                             |     |
 760          *                             | ... | Function call stack
 761          *                             |     |
 762          *                             +-----+
 763          *                              low
 764          */
 765         if (ctx->stack_sz) {
 766                 emit_mov_64(ctx, A64_FP, A64_SP);
 767                 emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 768         }
 769 }
 770
 771 static void
 772 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
 773 {
 774         if (ctx->stack_sz)
 775                 emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
 776         emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
 777         emit_ret(ctx);
 778 }
 779
 780 static void
 781 emit_prologue(struct a64_jit_ctx *ctx)
 782 {
 783         if (ctx->foundcall)
 784                 emit_prologue_has_call(ctx);
 785         else
 786                 emit_prologue_no_call(ctx);
 787
 788         ctx->program_start = ctx->idx;
 789 }
 790
 791 static void
 792 emit_epilogue(struct a64_jit_ctx *ctx)
 793 {
 794         ctx->program_sz = ctx->idx - ctx->program_start;
 795
 796         if (ctx->foundcall)
 797                 emit_epilogue_has_call(ctx);
 798         else
 799                 emit_epilogue_no_call(ctx);
 800 }
 801
 802 static void
 803 emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19)
 804 {
 805         uint32_t insn, imm;
 806
 807         imm = mask_imm(19, imm19);
 808         insn = (!!is64) << 31;
 809         insn |= 0x35 << 24;
 810         insn |= imm << 5;
 811         insn |= rt;
 812
 813         emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19));
 814 }
 815
 816 static void
 817 emit_b(struct a64_jit_ctx *ctx, int32_t imm26)
 818 {
 819         uint32_t insn, imm;
 820
 821         imm = mask_imm(26, imm26);
 822         insn = 0x5 << 26;
 823         insn |= imm;
 824
 825         emit_insn(ctx, insn, check_imm(26, imm26));
 826 }
 827
 828 static void
 829 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
 830 {
 831         uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
 832         uint16_t jump_to_epilogue;
 833
 834         emit_cbnz(ctx, is64, src, 3);
 835         emit_mov_imm(ctx, is64, r0, 0);
 836         jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
 837         emit_b(ctx, jump_to_epilogue);
 838 }
 839
 840 static void
 841 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 842 {
 843         const struct ebpf_insn *ins;
 844         uint8_t op;
 845         uint32_t i;
 846
 847         for (i = 0; i != bpf->prm.nb_ins; i++) {
 848                 ins = bpf->prm.ins + i;
 849                 op = ins->code;
 850
 851                 switch (op) {
 852                 /* Call imm */
 853                 case (BPF_JMP | EBPF_CALL):
 854                         ctx->foundcall = 1;
 855                         return;
 856                 }
 857         }
 858 }
 859
 860 /*
 861  * Walk through eBPF code and translate them to arm64 one.
 862  */
 863 static int
 864 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 865 {
 866         uint8_t op, dst, src, tmp1, tmp2;
 867         const struct ebpf_insn *ins;
 868         uint64_t u64;
 869         int16_t off;
 870         int32_t imm;
 871         uint32_t i;
 872         bool is64;
 873         int rc;
 874
 875         /* Reset context fields */
 876         ctx->idx = 0;
 877         /* arm64 SP must be aligned to 16 */
 878         ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
 879         tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
 880         tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
 881
 882         emit_prologue(ctx);
 883
 884         for (i = 0; i != bpf->prm.nb_ins; i++) {
 885
 886                 ins = bpf->prm.ins + i;
 887                 op = ins->code;
 888                 off = ins->off;
 889                 imm = ins->imm;
 890
 891                 dst = ebpf_to_a64_reg(ctx, ins->dst_reg);
 892                 src = ebpf_to_a64_reg(ctx, ins->src_reg);
 893                 is64 = (BPF_CLASS(op) == EBPF_ALU64);
 894
 895                 switch (op) {
 896                 /* dst = src */
 897                 case (BPF_ALU | EBPF_MOV | BPF_X):
 898                 case (EBPF_ALU64 | EBPF_MOV | BPF_X):
 899                         emit_mov(ctx, is64, dst, src);
 900                         break;
 901                 /* dst = imm */
 902                 case (BPF_ALU | EBPF_MOV | BPF_K):
 903                 case (EBPF_ALU64 | EBPF_MOV | BPF_K):
 904                         emit_mov_imm(ctx, is64, dst, imm);
 905                         break;
 906                 /* dst += src */
 907                 case (BPF_ALU | BPF_ADD | BPF_X):
 908                 case (EBPF_ALU64 | BPF_ADD | BPF_X):
 909                         emit_add(ctx, is64, dst, src);
 910                         break;
 911                 /* dst += imm */
 912                 case (BPF_ALU | BPF_ADD | BPF_K):
 913                 case (EBPF_ALU64 | BPF_ADD | BPF_K):
 914                         emit_mov_imm(ctx, is64, tmp1, imm);
 915                         emit_add(ctx, is64, dst, tmp1);
 916                         break;
 917                 /* dst -= src */
 918                 case (BPF_ALU | BPF_SUB | BPF_X):
 919                 case (EBPF_ALU64 | BPF_SUB | BPF_X):
 920                         emit_sub(ctx, is64, dst, src);
 921                         break;
 922                 /* dst -= imm */
 923                 case (BPF_ALU | BPF_SUB | BPF_K):
 924                 case (EBPF_ALU64 | BPF_SUB | BPF_K):
 925                         emit_mov_imm(ctx, is64, tmp1, imm);
 926                         emit_sub(ctx, is64, dst, tmp1);
 927                         break;
 928                 /* dst *= src */
 929                 case (BPF_ALU | BPF_MUL | BPF_X):
 930                 case (EBPF_ALU64 | BPF_MUL | BPF_X):
 931                         emit_mul(ctx, is64, dst, src);
 932                         break;
 933                 /* dst *= imm */
 934                 case (BPF_ALU | BPF_MUL | BPF_K):
 935                 case (EBPF_ALU64 | BPF_MUL | BPF_K):
 936                         emit_mov_imm(ctx, is64, tmp1, imm);
 937                         emit_mul(ctx, is64, dst, tmp1);
 938                         break;
 939                 /* dst /= src */
 940                 case (BPF_ALU | BPF_DIV | BPF_X):
 941                 case (EBPF_ALU64 | BPF_DIV | BPF_X):
 942                         emit_return_zero_if_src_zero(ctx, is64, src);
 943                         emit_div(ctx, is64, dst, src);
 944                         break;
 945                 /* dst /= imm */
 946                 case (BPF_ALU | BPF_DIV | BPF_K):
 947                 case (EBPF_ALU64 | BPF_DIV | BPF_K):
 948                         emit_mov_imm(ctx, is64, tmp1, imm);
 949                         emit_div(ctx, is64, dst, tmp1);
 950                         break;
 951                 /* dst %= src */
 952                 case (BPF_ALU | BPF_MOD | BPF_X):
 953                 case (EBPF_ALU64 | BPF_MOD | BPF_X):
 954                         emit_return_zero_if_src_zero(ctx, is64, src);
 955                         emit_mod(ctx, is64, tmp1, dst, src);
 956                         break;
 957                 /* dst %= imm */
 958                 case (BPF_ALU | BPF_MOD | BPF_K):
 959                 case (EBPF_ALU64 | BPF_MOD | BPF_K):
 960                         emit_mov_imm(ctx, is64, tmp1, imm);
 961                         emit_mod(ctx, is64, tmp2, dst, tmp1);
 962                         break;
 963                 /* dst |= src */
 964                 case (BPF_ALU | BPF_OR | BPF_X):
 965                 case (EBPF_ALU64 | BPF_OR | BPF_X):
 966                         emit_or(ctx, is64, dst, src);
 967                         break;
 968                 /* dst |= imm */
 969                 case (BPF_ALU | BPF_OR | BPF_K):
 970                 case (EBPF_ALU64 | BPF_OR | BPF_K):
 971                         emit_mov_imm(ctx, is64, tmp1, imm);
 972                         emit_or(ctx, is64, dst, tmp1);
 973                         break;
 974                 /* dst &= src */
 975                 case (BPF_ALU | BPF_AND | BPF_X):
 976                 case (EBPF_ALU64 | BPF_AND | BPF_X):
 977                         emit_and(ctx, is64, dst, src);
 978                         break;
 979                 /* dst &= imm */
 980                 case (BPF_ALU | BPF_AND | BPF_K):
 981                 case (EBPF_ALU64 | BPF_AND | BPF_K):
 982                         emit_mov_imm(ctx, is64, tmp1, imm);
 983                         emit_and(ctx, is64, dst, tmp1);
 984                         break;
 985                 /* dst ^= src */
 986                 case (BPF_ALU | BPF_XOR | BPF_X):
 987                 case (EBPF_ALU64 | BPF_XOR | BPF_X):
 988                         emit_xor(ctx, is64, dst, src);
 989                         break;
 990                 /* dst ^= imm */
 991                 case (BPF_ALU | BPF_XOR | BPF_K):
 992                 case (EBPF_ALU64 | BPF_XOR | BPF_K):
 993                         emit_mov_imm(ctx, is64, tmp1, imm);
 994                         emit_xor(ctx, is64, dst, tmp1);
 995                         break;
 996                 /* dst = -dst */
 997                 case (BPF_ALU | BPF_NEG):
 998                 case (EBPF_ALU64 | BPF_NEG):
 999                         emit_neg(ctx, is64, dst);
1000                         break;
1001                 /* dst <<= src */
1002                 case BPF_ALU | BPF_LSH | BPF_X:
1003                 case EBPF_ALU64 | BPF_LSH | BPF_X:
1004                         emit_lslv(ctx, is64, dst, src);
1005                         break;
1006                 /* dst <<= imm */
1007                 case BPF_ALU | BPF_LSH | BPF_K:
1008                 case EBPF_ALU64 | BPF_LSH | BPF_K:
1009                         emit_lsl(ctx, is64, dst, imm);
1010                         break;
1011                 /* dst >>= src */
1012                 case BPF_ALU | BPF_RSH | BPF_X:
1013                 case EBPF_ALU64 | BPF_RSH | BPF_X:
1014                         emit_lsrv(ctx, is64, dst, src);
1015                         break;
1016                 /* dst >>= imm */
1017                 case BPF_ALU | BPF_RSH | BPF_K:
1018                 case EBPF_ALU64 | BPF_RSH | BPF_K:
1019                         emit_lsr(ctx, is64, dst, imm);
1020                         break;
1021                 /* dst >>= src (arithmetic) */
1022                 case BPF_ALU | EBPF_ARSH | BPF_X:
1023                 case EBPF_ALU64 | EBPF_ARSH | BPF_X:
1024                         emit_asrv(ctx, is64, dst, src);
1025                         break;
1026                 /* dst >>= imm (arithmetic) */
1027                 case BPF_ALU | EBPF_ARSH | BPF_K:
1028                 case EBPF_ALU64 | EBPF_ARSH | BPF_K:
1029                         emit_asr(ctx, is64, dst, imm);
1030                         break;
1031                 /* dst = be##imm(dst) */
1032                 case (BPF_ALU | EBPF_END | EBPF_TO_BE):
1033                         emit_be(ctx, dst, imm);
1034                         break;
1035                 /* dst = le##imm(dst) */
1036                 case (BPF_ALU | EBPF_END | EBPF_TO_LE):
1037                         emit_le(ctx, dst, imm);
1038                         break;
1039                 /* dst = *(size *) (src + off) */
1040                 case (BPF_LDX | BPF_MEM | BPF_B):
1041                 case (BPF_LDX | BPF_MEM | BPF_H):
1042                 case (BPF_LDX | BPF_MEM | BPF_W):
1043                 case (BPF_LDX | BPF_MEM | EBPF_DW):
1044                         emit_mov_imm(ctx, 1, tmp1, off);
1045                         emit_ldr(ctx, BPF_SIZE(op), dst, src, tmp1);
1046                         break;
1047                 /* dst = imm64 */
1048                 case (BPF_LD | BPF_IMM | EBPF_DW):
1049                         u64 = ((uint64_t)ins[1].imm << 32) | (uint32_t)imm;
1050                         emit_mov_imm(ctx, 1, dst, u64);
1051                         i++;
1052                         break;
1053                 /* *(size *)(dst + off) = src */
1054                 case (BPF_STX | BPF_MEM | BPF_B):
1055                 case (BPF_STX | BPF_MEM | BPF_H):
1056                 case (BPF_STX | BPF_MEM | BPF_W):
1057                 case (BPF_STX | BPF_MEM | EBPF_DW):
1058                         emit_mov_imm(ctx, 1, tmp1, off);
1059                         emit_str(ctx, BPF_SIZE(op), src, dst, tmp1);
1060                         break;
1061                 /* *(size *)(dst + off) = imm */
1062                 case (BPF_ST | BPF_MEM | BPF_B):
1063                 case (BPF_ST | BPF_MEM | BPF_H):
1064                 case (BPF_ST | BPF_MEM | BPF_W):
1065                 case (BPF_ST | BPF_MEM | EBPF_DW):
1066                         emit_mov_imm(ctx, 1, tmp1, imm);
1067                         emit_mov_imm(ctx, 1, tmp2, off);
1068                         emit_str(ctx, BPF_SIZE(op), tmp1, dst, tmp2);
1069                         break;
1070                 /* Return r0 */
1071                 case (BPF_JMP | EBPF_EXIT):
1072                         emit_epilogue(ctx);
1073                         break;
1074                 default:
1075                         RTE_BPF_LOG(ERR,
1076                                 "%s(%p): invalid opcode %#x at pc: %u;\n",
1077                                 __func__, bpf, ins->code, i);
1078                         return -EINVAL;
1079                 }
1080         }
1081         rc = check_invalid_args(ctx, ctx->idx);
1082
1083         return rc;
1084 }
1085
1086 /*
1087  * Produce a native ISA version of the given BPF code.
1088  */
1089 int
1090 bpf_jit_arm64(struct rte_bpf *bpf)
1091 {
1092         struct a64_jit_ctx ctx;
1093         size_t size;
1094         int rc;
1095
1096         /* Init JIT context */
1097         memset(&ctx, 0, sizeof(ctx));
1098
1099         /* Find eBPF program has call class or not */
1100         check_program_has_call(&ctx, bpf);
1101
1102         /* First pass to calculate total code size and valid jump offsets */
1103         rc = emit(&ctx, bpf);
1104         if (rc)
1105                 goto finish;
1106
1107         size = ctx.idx * sizeof(uint32_t);
1108         /* Allocate JIT program memory */
1109         ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
1110                                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1111         if (ctx.ins == MAP_FAILED) {
1112                 rc = -ENOMEM;
1113                 goto finish;
1114         }
1115
1116         /* Second pass to generate code */
1117         rc = emit(&ctx, bpf);
1118         if (rc)
1119                 goto munmap;
1120
1121         rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
1122         if (rc) {
1123                 rc = -errno;
1124                 goto munmap;
1125         }
1126
1127         /* Flush the icache */
1128         __builtin___clear_cache(ctx.ins, ctx.ins + ctx.idx);
1129
1130         bpf->jit.func = (void *)ctx.ins;
1131         bpf->jit.sz = size;
1132
1133         goto finish;
1134
1135 munmap:
1136         munmap(ctx.ins, size);
1137 finish:
1138         return rc;
1139 }