1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2021 Microsoft Corporation
4 * Based on bpf_convert_filter() in the Linux kernel sources
7 * Licensed as BSD with permission original authors.
8 * Copyright (C) 2017 Tobias Klauser
9 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
20 #include <rte_common.h>
23 #include <rte_malloc.h>
24 #include <rte_errno.h>
26 /* Workaround name conflicts with libpcap */
27 #define bpf_validate(f, len) bpf_validate_libpcap(f, len)
28 #include <pcap/pcap.h>
36 #define BPF_MAXINSNS 4096
40 * Linux socket filter uses negative absolute offsets to
41 * reference ancillary data.
43 #define SKF_AD_OFF (-0x1000)
44 #define SKF_AD_PROTOCOL 0
45 #define SKF_AD_PKTTYPE 4
46 #define SKF_AD_IFINDEX 8
47 #define SKF_AD_NLATTR 12
48 #define SKF_AD_NLATTR_NEST 16
49 #define SKF_AD_MARK 20
50 #define SKF_AD_QUEUE 24
51 #define SKF_AD_HATYPE 28
52 #define SKF_AD_RXHASH 32
54 #define SKF_AD_ALU_XOR_X 40
55 #define SKF_AD_VLAN_TAG 44
56 #define SKF_AD_VLAN_TAG_PRESENT 48
57 #define SKF_AD_PAY_OFFSET 52
58 #define SKF_AD_RANDOM 56
59 #define SKF_AD_VLAN_TPID 60
62 /* ArgX, context and stack frame pointer register positions. Note,
63 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
64 * calls in BPF_CALL instruction.
66 #define BPF_REG_ARG1 EBPF_REG_1
67 #define BPF_REG_ARG2 EBPF_REG_2
68 #define BPF_REG_ARG3 EBPF_REG_3
69 #define BPF_REG_ARG4 EBPF_REG_4
70 #define BPF_REG_ARG5 EBPF_REG_5
71 #define BPF_REG_CTX EBPF_REG_6
72 #define BPF_REG_FP EBPF_REG_10
74 /* Additional register mappings for converted user programs. */
75 #define BPF_REG_A EBPF_REG_0
76 #define BPF_REG_X EBPF_REG_7
77 #define BPF_REG_TMP EBPF_REG_8
79 /* Helper macros for filter block array initializers. */
81 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
83 #define EBPF_ALU64_REG(OP, DST, SRC) \
84 ((struct ebpf_insn) { \
85 .code = EBPF_ALU64 | BPF_OP(OP) | BPF_X, \
91 #define BPF_ALU32_REG(OP, DST, SRC) \
92 ((struct ebpf_insn) { \
93 .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
99 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
101 #define BPF_ALU32_IMM(OP, DST, IMM) \
102 ((struct ebpf_insn) { \
103 .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
109 /* Short form of mov, dst_reg = src_reg */
111 #define BPF_MOV64_REG(DST, SRC) \
112 ((struct ebpf_insn) { \
113 .code = EBPF_ALU64 | EBPF_MOV | BPF_X, \
119 #define BPF_MOV32_REG(DST, SRC) \
120 ((struct ebpf_insn) { \
121 .code = BPF_ALU | EBPF_MOV | BPF_X, \
127 /* Short form of mov, dst_reg = imm32 */
129 #define BPF_MOV32_IMM(DST, IMM) \
130 ((struct ebpf_insn) { \
131 .code = BPF_ALU | EBPF_MOV | BPF_K, \
137 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
139 #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \
140 ((struct ebpf_insn) { \
141 .code = BPF_ALU | EBPF_MOV | BPF_SRC(TYPE), \
147 /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
149 #define BPF_LD_ABS(SIZE, IMM) \
150 ((struct ebpf_insn) { \
151 .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
157 /* Memory load, dst_reg = *(uint *) (src_reg + off16) */
159 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
160 ((struct ebpf_insn) { \
161 .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
167 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
169 #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
170 ((struct ebpf_insn) { \
171 .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
177 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
179 #define BPF_JMP_IMM(OP, DST, IMM, OFF) \
180 ((struct ebpf_insn) { \
181 .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
187 /* Raw code statement block */
189 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
190 ((struct ebpf_insn) { \
199 #define BPF_EXIT_INSN() \
200 ((struct ebpf_insn) { \
201 .code = BPF_JMP | EBPF_EXIT, \
208 * Placeholder to convert BPF extensions like length and VLAN tag
209 * If and when DPDK BPF supports them.
211 static bool convert_bpf_load(const struct bpf_insn *fp,
212 struct ebpf_insn **new_insnp __rte_unused)
215 case SKF_AD_OFF + SKF_AD_PROTOCOL:
216 case SKF_AD_OFF + SKF_AD_PKTTYPE:
217 case SKF_AD_OFF + SKF_AD_IFINDEX:
218 case SKF_AD_OFF + SKF_AD_HATYPE:
219 case SKF_AD_OFF + SKF_AD_MARK:
220 case SKF_AD_OFF + SKF_AD_RXHASH:
221 case SKF_AD_OFF + SKF_AD_QUEUE:
222 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
223 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
224 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
225 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
226 case SKF_AD_OFF + SKF_AD_NLATTR:
227 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
228 case SKF_AD_OFF + SKF_AD_CPU:
229 case SKF_AD_OFF + SKF_AD_RANDOM:
230 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
231 /* Linux has special negative offsets to access meta-data. */
233 "rte_bpf_convert: socket offset %d not supported\n",
241 static int bpf_convert_filter(const struct bpf_insn *prog, size_t len,
242 struct ebpf_insn *new_prog, uint32_t *new_len)
244 unsigned int pass = 0;
245 size_t new_flen = 0, target, i;
246 struct ebpf_insn *new_insn;
247 const struct bpf_insn *fp;
251 if (len > BPF_MAXINSNS) {
252 RTE_BPF_LOG(ERR, "%s: cBPF program too long (%zu insns)\n",
257 /* On second pass, allocate the new program */
259 addrs = calloc(len, sizeof(*addrs));
268 /* Classic BPF related prologue emission. */
270 /* Classic BPF expects A and X to be reset first. These need
271 * to be guaranteed to be the first two instructions.
273 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
274 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
276 /* All programs must keep CTX in callee saved BPF_REG_CTX.
277 * In eBPF case it's done by the compiler, here we need to
278 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
280 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
285 for (i = 0; i < len; fp++, i++) {
286 struct ebpf_insn tmp_insns[6] = { };
287 struct ebpf_insn *insn = tmp_insns;
290 addrs[i] = new_insn - new_prog;
293 /* Absolute loads are how classic BPF accesses skb */
294 case BPF_LD | BPF_ABS | BPF_W:
295 case BPF_LD | BPF_ABS | BPF_H:
296 case BPF_LD | BPF_ABS | BPF_B:
297 if (convert_bpf_load(fp, &insn))
300 *insn = BPF_RAW_INSN(fp->code, 0, 0, 0, fp->k);
303 case BPF_ALU | BPF_DIV | BPF_X:
304 case BPF_ALU | BPF_MOD | BPF_X:
305 /* For cBPF, don't cause floating point exception */
306 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
307 *insn++ = BPF_JMP_IMM(EBPF_JNE, BPF_REG_X, 0, 2);
308 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
309 *insn++ = BPF_EXIT_INSN();
311 case BPF_ALU | BPF_ADD | BPF_X:
312 case BPF_ALU | BPF_ADD | BPF_K:
313 case BPF_ALU | BPF_SUB | BPF_X:
314 case BPF_ALU | BPF_SUB | BPF_K:
315 case BPF_ALU | BPF_AND | BPF_X:
316 case BPF_ALU | BPF_AND | BPF_K:
317 case BPF_ALU | BPF_OR | BPF_X:
318 case BPF_ALU | BPF_OR | BPF_K:
319 case BPF_ALU | BPF_LSH | BPF_X:
320 case BPF_ALU | BPF_LSH | BPF_K:
321 case BPF_ALU | BPF_RSH | BPF_X:
322 case BPF_ALU | BPF_RSH | BPF_K:
323 case BPF_ALU | BPF_XOR | BPF_X:
324 case BPF_ALU | BPF_XOR | BPF_K:
325 case BPF_ALU | BPF_MUL | BPF_X:
326 case BPF_ALU | BPF_MUL | BPF_K:
327 case BPF_ALU | BPF_DIV | BPF_K:
328 case BPF_ALU | BPF_MOD | BPF_K:
329 case BPF_ALU | BPF_NEG:
330 case BPF_LD | BPF_IND | BPF_W:
331 case BPF_LD | BPF_IND | BPF_H:
332 case BPF_LD | BPF_IND | BPF_B:
333 /* All arithmetic insns map as-is. */
334 insn->code = fp->code;
335 insn->dst_reg = BPF_REG_A;
336 bpf_src = BPF_SRC(fp->code);
337 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
342 /* Jump transformation cannot use BPF block macros
343 * everywhere as offset calculation and target updates
344 * require a bit more work than the rest, i.e. jump
345 * opcodes map as-is, but offsets need adjustment.
348 #define BPF_EMIT_JMP \
352 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
353 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
354 insn->off -= insn - tmp_insns; \
357 case BPF_JMP | BPF_JA:
358 target = i + fp->k + 1;
359 insn->code = fp->code;
363 case BPF_JMP | BPF_JEQ | BPF_K:
364 case BPF_JMP | BPF_JEQ | BPF_X:
365 case BPF_JMP | BPF_JSET | BPF_K:
366 case BPF_JMP | BPF_JSET | BPF_X:
367 case BPF_JMP | BPF_JGT | BPF_K:
368 case BPF_JMP | BPF_JGT | BPF_X:
369 case BPF_JMP | BPF_JGE | BPF_K:
370 case BPF_JMP | BPF_JGE | BPF_X:
371 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
372 /* BPF immediates are signed, zero extend
373 * immediate into tmp register and use it
376 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
378 insn->dst_reg = BPF_REG_A;
379 insn->src_reg = BPF_REG_TMP;
382 insn->dst_reg = BPF_REG_A;
384 bpf_src = BPF_SRC(fp->code);
385 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
388 /* Common case where 'jump_false' is next insn. */
390 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
391 target = i + fp->jt + 1;
396 /* Convert JEQ into JNE when 'jump_true' is next insn. */
397 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
398 insn->code = BPF_JMP | EBPF_JNE | bpf_src;
399 target = i + fp->jf + 1;
404 /* Other jumps are mapped into two insns: Jxx and JA. */
405 target = i + fp->jt + 1;
406 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
410 insn->code = BPF_JMP | BPF_JA;
411 target = i + fp->jf + 1;
415 /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
416 case BPF_LDX | BPF_MSH | BPF_B:
418 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
419 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
420 *insn++ = BPF_LD_ABS(BPF_B, fp->k);
422 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
424 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
426 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
428 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
431 /* RET_K is remapped into 2 insns. RET_A case doesn't need an
432 * extra mov as EBPF_REG_0 is already mapped into BPF_REG_A.
434 case BPF_RET | BPF_A:
435 case BPF_RET | BPF_K:
436 if (BPF_RVAL(fp->code) == BPF_K) {
437 *insn++ = BPF_MOV32_RAW(BPF_K, EBPF_REG_0,
440 *insn = BPF_EXIT_INSN();
443 /* Store to stack. */
446 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
447 BPF_ST ? BPF_REG_A : BPF_REG_X,
448 -(BPF_MEMWORDS - fp->k) * 4);
451 /* Load from stack. */
452 case BPF_LD | BPF_MEM:
453 case BPF_LDX | BPF_MEM:
454 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
455 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
456 -(BPF_MEMWORDS - fp->k) * 4);
460 case BPF_LD | BPF_IMM:
461 case BPF_LDX | BPF_IMM:
462 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
463 BPF_REG_A : BPF_REG_X, fp->k);
467 case BPF_MISC | BPF_TAX:
468 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
472 case BPF_MISC | BPF_TXA:
473 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
476 /* A = mbuf->len or X = mbuf->len */
477 case BPF_LD | BPF_W | BPF_LEN:
478 case BPF_LDX | BPF_W | BPF_LEN:
479 /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */
481 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
482 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
483 offsetof(struct rte_mbuf, pkt_len));
486 /* Unknown instruction. */
488 RTE_BPF_LOG(ERR, "%s: Unknown instruction!: %#x\n",
495 memcpy(new_insn, tmp_insns,
496 sizeof(*insn) * (insn - tmp_insns));
497 new_insn += insn - tmp_insns;
501 /* Only calculating new length. */
502 *new_len = new_insn - new_prog;
507 if ((ptrdiff_t)new_flen != new_insn - new_prog) {
508 new_flen = new_insn - new_prog;
515 assert(*new_len == new_flen);
524 rte_bpf_convert(const struct bpf_program *prog)
526 struct rte_bpf_prm *prm = NULL;
527 struct ebpf_insn *ebpf = NULL;
528 uint32_t ebpf_len = 0;
532 RTE_BPF_LOG(ERR, "%s: NULL program\n", __func__);
537 /* 1st pass: calculate the eBPF program length */
538 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, NULL, &ebpf_len);
540 RTE_BPF_LOG(ERR, "%s: cannot get eBPF length\n", __func__);
545 RTE_BPF_LOG(DEBUG, "%s: prog len cBPF=%u -> eBPF=%u\n",
546 __func__, prog->bf_len, ebpf_len);
548 prm = rte_zmalloc("bpf_filter",
549 sizeof(*prm) + ebpf_len * sizeof(*ebpf), 0);
555 /* The EPBF instructions in this case are right after the header */
556 ebpf = (void *)(prm + 1);
558 /* 2nd pass: remap cBPF to eBPF instructions */
559 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, ebpf, &ebpf_len);
561 RTE_BPF_LOG(ERR, "%s: cannot convert cBPF to eBPF\n", __func__);
568 prm->nb_ins = ebpf_len;
570 /* Classic BPF programs use mbufs */
571 prm->prog_arg.type = RTE_BPF_ARG_PTR_MBUF;
572 prm->prog_arg.size = sizeof(struct rte_mbuf);