+#define A64_MOVN 0
+#define A64_MOVZ 2
+#define A64_MOVK 3
+static void
+mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
+ uint16_t imm16, uint8_t shift)
+{
+ uint32_t insn;
+
+ insn = (!!is64) << 31;
+ insn |= type << 29;
+ insn |= 0x25 << 23;
+ insn |= (shift/16) << 21;
+ insn |= imm16 << 5;
+ insn |= rd;
+
+ emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
+}
+
+static void
+emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
+{
+ uint16_t upper = val >> 16;
+ uint16_t lower = val & 0xffff;
+
+ /* Positive number */
+ if ((val & 1UL << 31) == 0) {
+ mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
+ if (upper)
+ mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
+ } else { /* Negative number */
+ if (upper == 0xffff) {
+ mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
+ } else {
+ mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
+ if (lower != 0xffff)
+ mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
+ }
+ }
+}
+
+static int
+u16_blocks_weight(const uint64_t val, bool one)
+{
+ return (((val >> 0) & 0xffff) == (one ? 0xffff : 0x0000)) +
+ (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
+ (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
+ (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
+}
+
+static void
+emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
+{
+ uint64_t nval = ~val;
+ int movn, sr;
+
+ if (is64 == 0)
+ return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
+
+ /* Find MOVN or MOVZ first */
+ movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
+ /* Find shift right value */
+ sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
+ sr = RTE_ALIGN_FLOOR(sr, 16);
+ sr = RTE_MAX(sr, 0);
+
+ if (movn)
+ mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
+ else
+ mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
+
+ sr -= 16;
+ while (sr >= 0) {
+ if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
+ mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
+ sr -= 16;
+ }
+}
+
+#define A64_ADD 0x58
+#define A64_SUB 0x258
+static void
+emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
+ uint8_t rm, uint16_t op)
+{
+ uint32_t insn;
+
+ insn = (!!is64) << 31;
+ insn |= op << 21; /* shift == 0 */
+ insn |= rm << 16;
+ insn |= rn << 5;
+ insn |= rd;
+
+ emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
+}
+
+static void
+emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
+{
+ emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
+}
+
+static void
+emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
+{
+ emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
+}
+
+static void
+emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
+{
+ uint32_t insn;
+
+ insn = (!!is64) << 31;
+ insn |= 0xd8 << 21;
+ insn |= rm << 16;
+ insn |= A64_ZR << 10;
+ insn |= rd << 5;
+ insn |= rd;
+
+ emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
+}
+
+#define A64_UDIV 0x2
+static void
+emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
+ uint8_t rn, uint8_t rm, uint16_t op)
+
+{
+ uint32_t insn;
+
+ insn = (!!is64) << 31;
+ insn |= 0xd6 << 21;
+ insn |= rm << 16;
+ insn |= op << 10;
+ insn |= rn << 5;
+ insn |= rd;
+
+ emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
+}
+
+static void
+emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
+{
+ emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
+}
+
+static void
+emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
+ uint8_t rm, uint8_t ra)
+{
+ uint32_t insn;
+
+ insn = (!!is64) << 31;
+ insn |= 0xd8 << 21;
+ insn |= rm << 16;
+ insn |= 0x1 << 15;
+ insn |= ra << 10;
+ insn |= rn << 5;
+ insn |= rd;
+
+ emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
+ check_reg(ra));
+}
+
+static void
+emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
+ uint8_t rm)
+{
+ emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
+ emit_msub(ctx, is64, rd, tmp, rm, rd);
+}
+