1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
105 struct tc_pedit_sel {
109 struct tc_pedit_key keys[0];
112 #endif /* HAVE_TC_ACT_VLAN */
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
128 #ifndef HAVE_TCA_CHAIN
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
238 /** Empty masks for known item types. */
240 struct rte_flow_item_port_id port_id;
241 struct rte_flow_item_eth eth;
242 struct rte_flow_item_vlan vlan;
243 struct rte_flow_item_ipv4 ipv4;
244 struct rte_flow_item_ipv6 ipv6;
245 struct rte_flow_item_tcp tcp;
246 struct rte_flow_item_udp udp;
247 } flow_tcf_mask_empty;
249 /** Supported masks for known item types. */
250 static const struct {
251 struct rte_flow_item_port_id port_id;
252 struct rte_flow_item_eth eth;
253 struct rte_flow_item_vlan vlan;
254 struct rte_flow_item_ipv4 ipv4;
255 struct rte_flow_item_ipv6 ipv6;
256 struct rte_flow_item_tcp tcp;
257 struct rte_flow_item_udp udp;
258 } flow_tcf_mask_supported = {
263 .type = RTE_BE16(0xffff),
264 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
265 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
268 /* PCP and VID only, no DEI. */
269 .tci = RTE_BE16(0xefff),
270 .inner_type = RTE_BE16(0xffff),
273 .next_proto_id = 0xff,
274 .src_addr = RTE_BE32(0xffffffff),
275 .dst_addr = RTE_BE32(0xffffffff),
280 "\xff\xff\xff\xff\xff\xff\xff\xff"
281 "\xff\xff\xff\xff\xff\xff\xff\xff",
283 "\xff\xff\xff\xff\xff\xff\xff\xff"
284 "\xff\xff\xff\xff\xff\xff\xff\xff",
287 .src_port = RTE_BE16(0xffff),
288 .dst_port = RTE_BE16(0xffff),
292 .src_port = RTE_BE16(0xffff),
293 .dst_port = RTE_BE16(0xffff),
297 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
298 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
299 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
300 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
301 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
303 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
305 /** DPDK port to network interface index (ifindex) conversion. */
306 struct flow_tcf_ptoi {
307 uint16_t port_id; /**< DPDK port ID. */
308 unsigned int ifindex; /**< Network interface index. */
311 /* Due to a limitation on driver/FW. */
312 #define MLX5_TCF_GROUP_ID_MAX 3
313 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
315 #define MLX5_TCF_FATE_ACTIONS \
316 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
317 MLX5_FLOW_ACTION_JUMP)
319 #define MLX5_TCF_VLAN_ACTIONS \
320 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
321 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
323 #define MLX5_TCF_PEDIT_ACTIONS \
324 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
325 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
326 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
327 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
328 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
330 #define MLX5_TCF_CONFIG_ACTIONS \
331 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
332 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
333 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
334 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
336 #define MAX_PEDIT_KEYS 128
337 #define SZ_PEDIT_KEY_VAL 4
339 #define NUM_OF_PEDIT_KEYS(sz) \
340 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
342 struct pedit_key_ex {
343 enum pedit_header_type htype;
347 struct pedit_parser {
348 struct tc_pedit_sel sel;
349 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
350 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
355 * Set pedit key of MAC address
358 * pointer to action specification
359 * @param[in,out] p_parser
360 * pointer to pedit_parser
363 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
364 struct pedit_parser *p_parser)
366 int idx = p_parser->sel.nkeys;
367 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
368 offsetof(struct ether_hdr, s_addr) :
369 offsetof(struct ether_hdr, d_addr);
370 const struct rte_flow_action_set_mac *conf =
371 (const struct rte_flow_action_set_mac *)actions->conf;
373 p_parser->keys[idx].off = off;
374 p_parser->keys[idx].mask = ~UINT32_MAX;
375 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
376 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
377 memcpy(&p_parser->keys[idx].val,
378 conf->mac_addr, SZ_PEDIT_KEY_VAL);
380 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
381 p_parser->keys[idx].mask = 0xFFFF0000;
382 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
383 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
384 memcpy(&p_parser->keys[idx].val,
385 conf->mac_addr + SZ_PEDIT_KEY_VAL,
386 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
387 p_parser->sel.nkeys = (++idx);
391 * Set pedit key of decrease/set ttl
394 * pointer to action specification
395 * @param[in,out] p_parser
396 * pointer to pedit_parser
397 * @param[in] item_flags
398 * flags of all items presented
401 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
402 struct pedit_parser *p_parser,
405 int idx = p_parser->sel.nkeys;
407 p_parser->keys[idx].mask = 0xFFFFFF00;
408 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
409 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
410 p_parser->keys[idx].off =
411 offsetof(struct ipv4_hdr, time_to_live);
413 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
414 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
415 p_parser->keys[idx].off =
416 offsetof(struct ipv6_hdr, hop_limits);
418 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
419 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
420 p_parser->keys[idx].val = 0x000000FF;
422 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
423 p_parser->keys[idx].val =
424 (__u32)((const struct rte_flow_action_set_ttl *)
425 actions->conf)->ttl_value;
427 p_parser->sel.nkeys = (++idx);
431 * Set pedit key of transport (TCP/UDP) port value
434 * pointer to action specification
435 * @param[in,out] p_parser
436 * pointer to pedit_parser
437 * @param[in] item_flags
438 * flags of all items presented
441 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
442 struct pedit_parser *p_parser,
445 int idx = p_parser->sel.nkeys;
447 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
448 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
449 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
450 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
451 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
452 /* offset of src/dst port is same for TCP and UDP */
453 p_parser->keys[idx].off =
454 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
455 offsetof(struct tcp_hdr, src_port) :
456 offsetof(struct tcp_hdr, dst_port);
457 p_parser->keys[idx].mask = 0xFFFF0000;
458 p_parser->keys[idx].val =
459 (__u32)((const struct rte_flow_action_set_tp *)
460 actions->conf)->port;
461 p_parser->sel.nkeys = (++idx);
465 * Set pedit key of ipv6 address
468 * pointer to action specification
469 * @param[in,out] p_parser
470 * pointer to pedit_parser
473 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
474 struct pedit_parser *p_parser)
476 int idx = p_parser->sel.nkeys;
477 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
479 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
480 offsetof(struct ipv6_hdr, src_addr) :
481 offsetof(struct ipv6_hdr, dst_addr);
482 const struct rte_flow_action_set_ipv6 *conf =
483 (const struct rte_flow_action_set_ipv6 *)actions->conf;
485 for (int i = 0; i < keys; i++, idx++) {
486 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
487 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
488 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
489 p_parser->keys[idx].mask = ~UINT32_MAX;
490 memcpy(&p_parser->keys[idx].val,
491 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
494 p_parser->sel.nkeys += keys;
498 * Set pedit key of ipv4 address
501 * pointer to action specification
502 * @param[in,out] p_parser
503 * pointer to pedit_parser
506 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
507 struct pedit_parser *p_parser)
509 int idx = p_parser->sel.nkeys;
511 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
512 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
513 p_parser->keys[idx].off =
514 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
515 offsetof(struct ipv4_hdr, src_addr) :
516 offsetof(struct ipv4_hdr, dst_addr);
517 p_parser->keys[idx].mask = ~UINT32_MAX;
518 p_parser->keys[idx].val =
519 ((const struct rte_flow_action_set_ipv4 *)
520 actions->conf)->ipv4_addr;
521 p_parser->sel.nkeys = (++idx);
525 * Create the pedit's na attribute in netlink message
526 * on pre-allocate message buffer
529 * pointer to pre-allocated netlink message buffer
530 * @param[in,out] actions
531 * pointer to pointer of actions specification.
532 * @param[in,out] action_flags
533 * pointer to actions flags
534 * @param[in] item_flags
535 * flags of all item presented
538 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
539 const struct rte_flow_action **actions,
542 struct pedit_parser p_parser;
543 struct nlattr *na_act_options;
544 struct nlattr *na_pedit_keys;
546 memset(&p_parser, 0, sizeof(p_parser));
547 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
548 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
549 /* all modify header actions should be in one tc-pedit action */
550 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
551 switch ((*actions)->type) {
552 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
553 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
554 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
556 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
557 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
558 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
560 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
561 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
562 flow_tcf_pedit_key_set_tp_port(*actions,
563 &p_parser, item_flags);
565 case RTE_FLOW_ACTION_TYPE_SET_TTL:
566 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
567 flow_tcf_pedit_key_set_dec_ttl(*actions,
568 &p_parser, item_flags);
570 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
571 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
572 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
575 goto pedit_mnl_msg_done;
579 p_parser.sel.action = TC_ACT_PIPE;
580 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
581 sizeof(p_parser.sel) +
582 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
585 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
586 for (int i = 0; i < p_parser.sel.nkeys; i++) {
587 struct nlattr *na_pedit_key =
588 mnl_attr_nest_start(nl,
589 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
590 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
591 p_parser.keys_ex[i].htype);
592 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
593 p_parser.keys_ex[i].cmd);
594 mnl_attr_nest_end(nl, na_pedit_key);
596 mnl_attr_nest_end(nl, na_pedit_keys);
597 mnl_attr_nest_end(nl, na_act_options);
602 * Calculate max memory size of one TC-pedit actions.
603 * One TC-pedit action can contain set of keys each defining
604 * a rewrite element (rte_flow action)
606 * @param[in,out] actions
607 * actions specification.
608 * @param[in,out] action_flags
610 * @param[in,out] size
613 * Max memory size of one TC-pedit action
616 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
617 uint64_t *action_flags)
623 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
624 SZ_NLATTR_STRZ_OF("pedit") +
625 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
626 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
627 switch ((*actions)->type) {
628 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
629 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
630 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
632 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
633 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
634 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
636 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
637 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
638 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
640 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
641 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
642 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
644 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
645 /* TCP is as same as UDP */
646 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
647 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
649 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
650 /* TCP is as same as UDP */
651 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
652 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
654 case RTE_FLOW_ACTION_TYPE_SET_TTL:
655 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
656 flags |= MLX5_FLOW_ACTION_SET_TTL;
658 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
659 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
660 flags |= MLX5_FLOW_ACTION_DEC_TTL;
662 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
663 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
664 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
666 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
667 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
668 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
671 goto get_pedit_action_size_done;
674 get_pedit_action_size_done:
675 /* TCA_PEDIT_PARAMS_EX */
677 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
678 keys * sizeof(struct tc_pedit_key));
679 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
681 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
682 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
683 SZ_NLATTR_DATA_OF(2));
684 (*action_flags) |= flags;
690 * Retrieve mask for pattern item.
692 * This function does basic sanity checks on a pattern item in order to
693 * return the most appropriate mask for it.
696 * Item specification.
697 * @param[in] mask_default
698 * Default mask for pattern item as specified by the flow API.
699 * @param[in] mask_supported
700 * Mask fields supported by the implementation.
701 * @param[in] mask_empty
702 * Empty mask to return when there is no specification.
704 * Perform verbose error reporting if not NULL.
707 * Either @p item->mask or one of the mask parameters on success, NULL
708 * otherwise and rte_errno is set.
711 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
712 const void *mask_supported, const void *mask_empty,
713 size_t mask_size, struct rte_flow_error *error)
718 /* item->last and item->mask cannot exist without item->spec. */
719 if (!item->spec && (item->mask || item->last)) {
720 rte_flow_error_set(error, EINVAL,
721 RTE_FLOW_ERROR_TYPE_ITEM, item,
722 "\"mask\" or \"last\" field provided without"
723 " a corresponding \"spec\"");
726 /* No spec, no mask, no problem. */
729 mask = item->mask ? item->mask : mask_default;
732 * Single-pass check to make sure that:
733 * - Mask is supported, no bits are set outside mask_supported.
734 * - Both item->spec and item->last are included in mask.
736 for (i = 0; i != mask_size; ++i) {
739 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
740 ((const uint8_t *)mask_supported)[i]) {
741 rte_flow_error_set(error, ENOTSUP,
742 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
743 "unsupported field found"
748 (((const uint8_t *)item->spec)[i] & mask[i]) !=
749 (((const uint8_t *)item->last)[i] & mask[i])) {
750 rte_flow_error_set(error, EINVAL,
751 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
753 "range between \"spec\" and \"last\""
754 " not comprised in \"mask\"");
762 * Build a conversion table between port ID and ifindex.
765 * Pointer to Ethernet device.
767 * Pointer to ptoi table.
769 * Size of ptoi table provided.
772 * Size of ptoi table filled.
775 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
778 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
779 uint16_t port_id[n + 1];
781 unsigned int own = 0;
783 /* At least one port is needed when no switch domain is present. */
786 port_id[0] = dev->data->port_id;
788 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
792 for (i = 0; i != n; ++i) {
793 struct rte_eth_dev_info dev_info;
795 rte_eth_dev_info_get(port_id[i], &dev_info);
796 if (port_id[i] == dev->data->port_id)
798 ptoi[i].port_id = port_id[i];
799 ptoi[i].ifindex = dev_info.if_index;
801 /* Ensure first entry of ptoi[] is the current device. */
807 /* An entry with zero ifindex terminates ptoi[]. */
814 * Verify the @p attr will be correctly understood by the E-switch.
817 * Pointer to flow attributes
819 * Pointer to error structure.
822 * 0 on success, a negative errno value otherwise and rte_errno is set.
825 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
826 struct rte_flow_error *error)
829 * Supported attributes: groups, some priorities and ingress only.
830 * group is supported only if kernel supports chain. Don't care about
831 * transfer as it is the caller's problem.
833 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
834 return rte_flow_error_set(error, ENOTSUP,
835 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
836 "group ID larger than "
837 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
839 else if (attr->group > 0 &&
840 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
841 return rte_flow_error_set(error, ENOTSUP,
842 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
844 "lowest priority level is "
845 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
846 " when group is configured");
847 else if (attr->priority > 0xfffe)
848 return rte_flow_error_set(error, ENOTSUP,
849 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
851 "lowest priority level is 0xfffe");
853 return rte_flow_error_set(error, EINVAL,
854 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
855 attr, "only ingress is supported");
857 return rte_flow_error_set(error, ENOTSUP,
858 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
859 attr, "egress is not supported");
864 * Validate flow for E-Switch.
867 * Pointer to the priv structure.
869 * Pointer to the flow attributes.
871 * Pointer to the list of items.
873 * Pointer to the list of actions.
875 * Pointer to the error structure.
878 * 0 on success, a negative errno value otherwise and rte_ernno is set.
881 flow_tcf_validate(struct rte_eth_dev *dev,
882 const struct rte_flow_attr *attr,
883 const struct rte_flow_item items[],
884 const struct rte_flow_action actions[],
885 struct rte_flow_error *error)
888 const struct rte_flow_item_port_id *port_id;
889 const struct rte_flow_item_eth *eth;
890 const struct rte_flow_item_vlan *vlan;
891 const struct rte_flow_item_ipv4 *ipv4;
892 const struct rte_flow_item_ipv6 *ipv6;
893 const struct rte_flow_item_tcp *tcp;
894 const struct rte_flow_item_udp *udp;
897 const struct rte_flow_action_port_id *port_id;
898 const struct rte_flow_action_jump *jump;
899 const struct rte_flow_action_of_push_vlan *of_push_vlan;
900 const struct rte_flow_action_of_set_vlan_vid *
902 const struct rte_flow_action_of_set_vlan_pcp *
904 const struct rte_flow_action_set_ipv4 *set_ipv4;
905 const struct rte_flow_action_set_ipv6 *set_ipv6;
907 uint32_t item_flags = 0;
908 uint32_t action_flags = 0;
909 uint8_t next_protocol = -1;
910 unsigned int tcm_ifindex = 0;
911 uint8_t pedit_validated = 0;
912 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
913 struct rte_eth_dev *port_id_dev = NULL;
917 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
918 PTOI_TABLE_SZ_MAX(dev)));
919 ret = flow_tcf_validate_attributes(attr, error);
922 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
925 switch (items->type) {
926 case RTE_FLOW_ITEM_TYPE_VOID:
928 case RTE_FLOW_ITEM_TYPE_PORT_ID:
929 mask.port_id = flow_tcf_item_mask
930 (items, &rte_flow_item_port_id_mask,
931 &flow_tcf_mask_supported.port_id,
932 &flow_tcf_mask_empty.port_id,
933 sizeof(flow_tcf_mask_supported.port_id),
937 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
941 spec.port_id = items->spec;
942 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
943 return rte_flow_error_set
945 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
947 "no support for partial mask on"
949 if (!mask.port_id->id)
952 for (i = 0; ptoi[i].ifindex; ++i)
953 if (ptoi[i].port_id == spec.port_id->id)
955 if (!ptoi[i].ifindex)
956 return rte_flow_error_set
958 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
960 "missing data to convert port ID to"
962 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
963 return rte_flow_error_set
965 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
967 "cannot match traffic for"
968 " several port IDs through"
969 " a single flow rule");
970 tcm_ifindex = ptoi[i].ifindex;
973 case RTE_FLOW_ITEM_TYPE_ETH:
974 ret = mlx5_flow_validate_item_eth(items, item_flags,
978 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
980 * Redundant check due to different supported mask.
981 * Same for the rest of items.
983 mask.eth = flow_tcf_item_mask
984 (items, &rte_flow_item_eth_mask,
985 &flow_tcf_mask_supported.eth,
986 &flow_tcf_mask_empty.eth,
987 sizeof(flow_tcf_mask_supported.eth),
991 if (mask.eth->type && mask.eth->type !=
993 return rte_flow_error_set
995 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
997 "no support for partial mask on"
1000 case RTE_FLOW_ITEM_TYPE_VLAN:
1001 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1005 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1006 mask.vlan = flow_tcf_item_mask
1007 (items, &rte_flow_item_vlan_mask,
1008 &flow_tcf_mask_supported.vlan,
1009 &flow_tcf_mask_empty.vlan,
1010 sizeof(flow_tcf_mask_supported.vlan),
1014 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1015 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1016 RTE_BE16(0xe000)) ||
1017 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1018 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1019 RTE_BE16(0x0fff)) ||
1020 (mask.vlan->inner_type &&
1021 mask.vlan->inner_type != RTE_BE16(0xffff)))
1022 return rte_flow_error_set
1024 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1026 "no support for partial masks on"
1027 " \"tci\" (PCP and VID parts) and"
1028 " \"inner_type\" fields");
1030 case RTE_FLOW_ITEM_TYPE_IPV4:
1031 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1035 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1036 mask.ipv4 = flow_tcf_item_mask
1037 (items, &rte_flow_item_ipv4_mask,
1038 &flow_tcf_mask_supported.ipv4,
1039 &flow_tcf_mask_empty.ipv4,
1040 sizeof(flow_tcf_mask_supported.ipv4),
1044 if (mask.ipv4->hdr.next_proto_id &&
1045 mask.ipv4->hdr.next_proto_id != 0xff)
1046 return rte_flow_error_set
1048 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1050 "no support for partial mask on"
1051 " \"hdr.next_proto_id\" field");
1052 else if (mask.ipv4->hdr.next_proto_id)
1054 ((const struct rte_flow_item_ipv4 *)
1055 (items->spec))->hdr.next_proto_id;
1057 case RTE_FLOW_ITEM_TYPE_IPV6:
1058 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1062 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1063 mask.ipv6 = flow_tcf_item_mask
1064 (items, &rte_flow_item_ipv6_mask,
1065 &flow_tcf_mask_supported.ipv6,
1066 &flow_tcf_mask_empty.ipv6,
1067 sizeof(flow_tcf_mask_supported.ipv6),
1071 if (mask.ipv6->hdr.proto &&
1072 mask.ipv6->hdr.proto != 0xff)
1073 return rte_flow_error_set
1075 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1077 "no support for partial mask on"
1078 " \"hdr.proto\" field");
1079 else if (mask.ipv6->hdr.proto)
1081 ((const struct rte_flow_item_ipv6 *)
1082 (items->spec))->hdr.proto;
1084 case RTE_FLOW_ITEM_TYPE_UDP:
1085 ret = mlx5_flow_validate_item_udp(items, item_flags,
1086 next_protocol, error);
1089 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1090 mask.udp = flow_tcf_item_mask
1091 (items, &rte_flow_item_udp_mask,
1092 &flow_tcf_mask_supported.udp,
1093 &flow_tcf_mask_empty.udp,
1094 sizeof(flow_tcf_mask_supported.udp),
1099 case RTE_FLOW_ITEM_TYPE_TCP:
1100 ret = mlx5_flow_validate_item_tcp
1103 &flow_tcf_mask_supported.tcp,
1107 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1108 mask.tcp = flow_tcf_item_mask
1109 (items, &rte_flow_item_tcp_mask,
1110 &flow_tcf_mask_supported.tcp,
1111 &flow_tcf_mask_empty.tcp,
1112 sizeof(flow_tcf_mask_supported.tcp),
1118 return rte_flow_error_set(error, ENOTSUP,
1119 RTE_FLOW_ERROR_TYPE_ITEM,
1120 NULL, "item not supported");
1123 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1125 uint32_t current_action_flag = 0;
1127 switch (actions->type) {
1128 case RTE_FLOW_ACTION_TYPE_VOID:
1130 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1131 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1134 conf.port_id = actions->conf;
1135 if (conf.port_id->original)
1138 for (i = 0; ptoi[i].ifindex; ++i)
1139 if (ptoi[i].port_id == conf.port_id->id)
1141 if (!ptoi[i].ifindex)
1142 return rte_flow_error_set
1144 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1146 "missing data to convert port ID to"
1148 port_id_dev = &rte_eth_devices[conf.port_id->id];
1150 case RTE_FLOW_ACTION_TYPE_JUMP:
1151 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1154 conf.jump = actions->conf;
1155 if (attr->group >= conf.jump->group)
1156 return rte_flow_error_set
1158 RTE_FLOW_ERROR_TYPE_ACTION,
1160 "can jump only to a group forward");
1162 case RTE_FLOW_ACTION_TYPE_DROP:
1163 current_action_flag = MLX5_FLOW_ACTION_DROP;
1165 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1166 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1168 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1169 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1171 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1172 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1173 return rte_flow_error_set
1175 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1176 "vlan modify is not supported,"
1177 " set action must follow push action");
1178 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1180 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1181 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1182 return rte_flow_error_set
1184 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1185 "vlan modify is not supported,"
1186 " set action must follow push action");
1187 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1189 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1190 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1192 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1193 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1195 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1196 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1198 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1199 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1201 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1202 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1204 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1205 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1207 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1208 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1210 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1211 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1213 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1214 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1216 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1217 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1220 return rte_flow_error_set(error, ENOTSUP,
1221 RTE_FLOW_ERROR_TYPE_ACTION,
1223 "action not supported");
1225 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1227 return rte_flow_error_set(error, EINVAL,
1228 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1230 "action configuration not set");
1232 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1234 return rte_flow_error_set(error, ENOTSUP,
1235 RTE_FLOW_ERROR_TYPE_ACTION,
1237 "set actions should be "
1238 "listed successively");
1239 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1240 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1241 pedit_validated = 1;
1242 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1243 (action_flags & MLX5_TCF_FATE_ACTIONS))
1244 return rte_flow_error_set(error, EINVAL,
1245 RTE_FLOW_ERROR_TYPE_ACTION,
1247 "can't have multiple fate"
1249 action_flags |= current_action_flag;
1251 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1252 (action_flags & MLX5_FLOW_ACTION_DROP))
1253 return rte_flow_error_set(error, ENOTSUP,
1254 RTE_FLOW_ERROR_TYPE_ACTION,
1256 "set action is not compatible with "
1258 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1259 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1260 return rte_flow_error_set(error, ENOTSUP,
1261 RTE_FLOW_ERROR_TYPE_ACTION,
1263 "set action must be followed by "
1266 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1267 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1268 return rte_flow_error_set(error, EINVAL,
1269 RTE_FLOW_ERROR_TYPE_ACTION,
1271 "no ipv4 item found in"
1275 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1276 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1277 return rte_flow_error_set(error, EINVAL,
1278 RTE_FLOW_ERROR_TYPE_ACTION,
1280 "no ipv6 item found in"
1284 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1286 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1287 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1288 return rte_flow_error_set(error, EINVAL,
1289 RTE_FLOW_ERROR_TYPE_ACTION,
1291 "no TCP/UDP item found in"
1295 * FW syndrome (0xA9C090):
1296 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1297 * forward to the uplink.
1299 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1300 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1301 ((struct priv *)port_id_dev->data->dev_private)->representor)
1302 return rte_flow_error_set(error, ENOTSUP,
1303 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1304 "vlan push can only be applied"
1305 " when forwarding to uplink port");
1307 * FW syndrome (0x294609):
1308 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1309 * are supported only while forwarding to vport.
1311 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1312 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1313 return rte_flow_error_set(error, ENOTSUP,
1314 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1315 "vlan actions are supported"
1316 " only with port_id action");
1317 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1318 return rte_flow_error_set(error, EINVAL,
1319 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1320 "no fate action is found");
1322 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1324 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1325 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1326 return rte_flow_error_set(error, EINVAL,
1327 RTE_FLOW_ERROR_TYPE_ACTION,
1329 "no IP found in pattern");
1332 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1333 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1334 return rte_flow_error_set(error, ENOTSUP,
1335 RTE_FLOW_ERROR_TYPE_ACTION,
1337 "no ethernet found in"
1344 * Calculate maximum size of memory for flow items of Linux TC flower and
1345 * extract specified items.
1348 * Pointer to the list of items.
1349 * @param[out] item_flags
1350 * Pointer to the detected items.
1353 * Maximum size of memory for items.
1356 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1357 const struct rte_flow_item items[],
1358 uint64_t *item_flags)
1363 size += SZ_NLATTR_STRZ_OF("flower") +
1364 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1365 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1366 if (attr->group > 0)
1367 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1368 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1369 switch (items->type) {
1370 case RTE_FLOW_ITEM_TYPE_VOID:
1372 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1374 case RTE_FLOW_ITEM_TYPE_ETH:
1375 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1376 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1377 /* dst/src MAC addr and mask. */
1378 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1380 case RTE_FLOW_ITEM_TYPE_VLAN:
1381 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1382 SZ_NLATTR_TYPE_OF(uint16_t) +
1383 /* VLAN Ether type. */
1384 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1385 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1386 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1388 case RTE_FLOW_ITEM_TYPE_IPV4:
1389 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1390 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1391 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1392 /* dst/src IP addr and mask. */
1393 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1395 case RTE_FLOW_ITEM_TYPE_IPV6:
1396 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1397 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1398 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1399 /* dst/src IP addr and mask. */
1400 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1402 case RTE_FLOW_ITEM_TYPE_UDP:
1403 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1404 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1405 /* dst/src port and mask. */
1406 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1408 case RTE_FLOW_ITEM_TYPE_TCP:
1409 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1410 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1411 /* dst/src port and mask. */
1412 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1416 "unsupported item %p type %d,"
1417 " items must be validated before flow creation",
1418 (const void *)items, items->type);
1422 *item_flags = flags;
1427 * Calculate maximum size of memory for flow actions of Linux TC flower and
1428 * extract specified actions.
1430 * @param[in] actions
1431 * Pointer to the list of actions.
1432 * @param[out] action_flags
1433 * Pointer to the detected actions.
1436 * Maximum size of memory for actions.
1439 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1440 uint64_t *action_flags)
1445 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1446 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1447 switch (actions->type) {
1448 case RTE_FLOW_ACTION_TYPE_VOID:
1450 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1451 size += SZ_NLATTR_NEST + /* na_act_index. */
1452 SZ_NLATTR_STRZ_OF("mirred") +
1453 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1454 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1455 flags |= MLX5_FLOW_ACTION_PORT_ID;
1457 case RTE_FLOW_ACTION_TYPE_JUMP:
1458 size += SZ_NLATTR_NEST + /* na_act_index. */
1459 SZ_NLATTR_STRZ_OF("gact") +
1460 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1461 SZ_NLATTR_TYPE_OF(struct tc_gact);
1462 flags |= MLX5_FLOW_ACTION_JUMP;
1464 case RTE_FLOW_ACTION_TYPE_DROP:
1465 size += SZ_NLATTR_NEST + /* na_act_index. */
1466 SZ_NLATTR_STRZ_OF("gact") +
1467 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1468 SZ_NLATTR_TYPE_OF(struct tc_gact);
1469 flags |= MLX5_FLOW_ACTION_DROP;
1471 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1472 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1473 goto action_of_vlan;
1474 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1475 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1476 goto action_of_vlan;
1477 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1478 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1479 goto action_of_vlan;
1480 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1481 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1482 goto action_of_vlan;
1484 size += SZ_NLATTR_NEST + /* na_act_index. */
1485 SZ_NLATTR_STRZ_OF("vlan") +
1486 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1487 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1488 SZ_NLATTR_TYPE_OF(uint16_t) +
1489 /* VLAN protocol. */
1490 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1491 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1493 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1494 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1495 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1496 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1497 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1498 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1499 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1500 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1501 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1502 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1503 size += flow_tcf_get_pedit_actions_size(&actions,
1508 "unsupported action %p type %d,"
1509 " items must be validated before flow creation",
1510 (const void *)actions, actions->type);
1514 *action_flags = flags;
1519 * Brand rtnetlink buffer with unique handle.
1521 * This handle should be unique for a given network interface to avoid
1525 * Pointer to Netlink message.
1527 * Unique 32-bit handle to use.
1530 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1532 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1534 tcm->tcm_handle = handle;
1535 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1536 (void *)nlh, handle);
1540 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1541 * memory required, allocates the memory, initializes Netlink message headers
1542 * and set unique TC message handle.
1545 * Pointer to the flow attributes.
1547 * Pointer to the list of items.
1548 * @param[in] actions
1549 * Pointer to the list of actions.
1550 * @param[out] item_flags
1551 * Pointer to bit mask of all items detected.
1552 * @param[out] action_flags
1553 * Pointer to bit mask of all actions detected.
1555 * Pointer to the error structure.
1558 * Pointer to mlx5_flow object on success,
1559 * otherwise NULL and rte_ernno is set.
1561 static struct mlx5_flow *
1562 flow_tcf_prepare(const struct rte_flow_attr *attr,
1563 const struct rte_flow_item items[],
1564 const struct rte_flow_action actions[],
1565 uint64_t *item_flags, uint64_t *action_flags,
1566 struct rte_flow_error *error)
1568 size_t size = sizeof(struct mlx5_flow) +
1569 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1570 MNL_ALIGN(sizeof(struct tcmsg));
1571 struct mlx5_flow *dev_flow;
1572 struct nlmsghdr *nlh;
1575 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1576 size += flow_tcf_get_actions_and_size(actions, action_flags);
1577 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1579 rte_flow_error_set(error, ENOMEM,
1580 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1581 "not enough memory to create E-Switch flow");
1584 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1585 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1586 *dev_flow = (struct mlx5_flow){
1587 .tcf = (struct mlx5_flow_tcf){
1593 * Generate a reasonably unique handle based on the address of the
1596 * This is straightforward on 32-bit systems where the flow pointer can
1597 * be used directly. Otherwise, its least significant part is taken
1598 * after shifting it by the previous power of two of the pointed buffer
1601 if (sizeof(dev_flow) <= 4)
1602 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1604 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1605 rte_log2_u32(rte_align32prevpow2(size)));
1610 * Translate flow for Linux TC flower and construct Netlink message.
1613 * Pointer to the priv structure.
1614 * @param[in, out] flow
1615 * Pointer to the sub flow.
1617 * Pointer to the flow attributes.
1619 * Pointer to the list of items.
1620 * @param[in] actions
1621 * Pointer to the list of actions.
1623 * Pointer to the error structure.
1626 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1629 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1630 const struct rte_flow_attr *attr,
1631 const struct rte_flow_item items[],
1632 const struct rte_flow_action actions[],
1633 struct rte_flow_error *error)
1636 const struct rte_flow_item_port_id *port_id;
1637 const struct rte_flow_item_eth *eth;
1638 const struct rte_flow_item_vlan *vlan;
1639 const struct rte_flow_item_ipv4 *ipv4;
1640 const struct rte_flow_item_ipv6 *ipv6;
1641 const struct rte_flow_item_tcp *tcp;
1642 const struct rte_flow_item_udp *udp;
1645 const struct rte_flow_action_port_id *port_id;
1646 const struct rte_flow_action_jump *jump;
1647 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1648 const struct rte_flow_action_of_set_vlan_vid *
1650 const struct rte_flow_action_of_set_vlan_pcp *
1653 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1654 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1655 struct tcmsg *tcm = dev_flow->tcf.tcm;
1656 uint32_t na_act_index_cur;
1657 bool eth_type_set = 0;
1658 bool vlan_present = 0;
1659 bool vlan_eth_type_set = 0;
1660 bool ip_proto_set = 0;
1661 struct nlattr *na_flower;
1662 struct nlattr *na_flower_act;
1663 struct nlattr *na_vlan_id = NULL;
1664 struct nlattr *na_vlan_priority = NULL;
1665 uint64_t item_flags = 0;
1667 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1668 PTOI_TABLE_SZ_MAX(dev)));
1669 nlh = dev_flow->tcf.nlh;
1670 tcm = dev_flow->tcf.tcm;
1671 /* Prepare API must have been called beforehand. */
1672 assert(nlh != NULL && tcm != NULL);
1673 tcm->tcm_family = AF_UNSPEC;
1674 tcm->tcm_ifindex = ptoi[0].ifindex;
1675 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1677 * Priority cannot be zero to prevent the kernel from picking one
1680 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1681 RTE_BE16(ETH_P_ALL));
1682 if (attr->group > 0)
1683 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1684 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1685 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1686 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1687 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1690 switch (items->type) {
1691 case RTE_FLOW_ITEM_TYPE_VOID:
1693 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1694 mask.port_id = flow_tcf_item_mask
1695 (items, &rte_flow_item_port_id_mask,
1696 &flow_tcf_mask_supported.port_id,
1697 &flow_tcf_mask_empty.port_id,
1698 sizeof(flow_tcf_mask_supported.port_id),
1700 assert(mask.port_id);
1701 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1703 spec.port_id = items->spec;
1704 if (!mask.port_id->id)
1707 for (i = 0; ptoi[i].ifindex; ++i)
1708 if (ptoi[i].port_id == spec.port_id->id)
1710 assert(ptoi[i].ifindex);
1711 tcm->tcm_ifindex = ptoi[i].ifindex;
1713 case RTE_FLOW_ITEM_TYPE_ETH:
1714 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1715 mask.eth = flow_tcf_item_mask
1716 (items, &rte_flow_item_eth_mask,
1717 &flow_tcf_mask_supported.eth,
1718 &flow_tcf_mask_empty.eth,
1719 sizeof(flow_tcf_mask_supported.eth),
1722 if (mask.eth == &flow_tcf_mask_empty.eth)
1724 spec.eth = items->spec;
1725 if (mask.eth->type) {
1726 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1730 if (!is_zero_ether_addr(&mask.eth->dst)) {
1731 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1733 spec.eth->dst.addr_bytes);
1734 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1736 mask.eth->dst.addr_bytes);
1738 if (!is_zero_ether_addr(&mask.eth->src)) {
1739 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1741 spec.eth->src.addr_bytes);
1742 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1744 mask.eth->src.addr_bytes);
1747 case RTE_FLOW_ITEM_TYPE_VLAN:
1748 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1749 mask.vlan = flow_tcf_item_mask
1750 (items, &rte_flow_item_vlan_mask,
1751 &flow_tcf_mask_supported.vlan,
1752 &flow_tcf_mask_empty.vlan,
1753 sizeof(flow_tcf_mask_supported.vlan),
1757 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1758 RTE_BE16(ETH_P_8021Q));
1761 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1763 spec.vlan = items->spec;
1764 if (mask.vlan->inner_type) {
1765 mnl_attr_put_u16(nlh,
1766 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1767 spec.vlan->inner_type);
1768 vlan_eth_type_set = 1;
1770 if (mask.vlan->tci & RTE_BE16(0xe000))
1771 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1773 (spec.vlan->tci) >> 13) & 0x7);
1774 if (mask.vlan->tci & RTE_BE16(0x0fff))
1775 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1780 case RTE_FLOW_ITEM_TYPE_IPV4:
1781 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1782 mask.ipv4 = flow_tcf_item_mask
1783 (items, &rte_flow_item_ipv4_mask,
1784 &flow_tcf_mask_supported.ipv4,
1785 &flow_tcf_mask_empty.ipv4,
1786 sizeof(flow_tcf_mask_supported.ipv4),
1789 if (!eth_type_set || !vlan_eth_type_set)
1790 mnl_attr_put_u16(nlh,
1792 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1793 TCA_FLOWER_KEY_ETH_TYPE,
1794 RTE_BE16(ETH_P_IP));
1796 vlan_eth_type_set = 1;
1797 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1799 spec.ipv4 = items->spec;
1800 if (mask.ipv4->hdr.next_proto_id) {
1801 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1802 spec.ipv4->hdr.next_proto_id);
1805 if (mask.ipv4->hdr.src_addr) {
1806 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1807 spec.ipv4->hdr.src_addr);
1808 mnl_attr_put_u32(nlh,
1809 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1810 mask.ipv4->hdr.src_addr);
1812 if (mask.ipv4->hdr.dst_addr) {
1813 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1814 spec.ipv4->hdr.dst_addr);
1815 mnl_attr_put_u32(nlh,
1816 TCA_FLOWER_KEY_IPV4_DST_MASK,
1817 mask.ipv4->hdr.dst_addr);
1820 case RTE_FLOW_ITEM_TYPE_IPV6:
1821 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1822 mask.ipv6 = flow_tcf_item_mask
1823 (items, &rte_flow_item_ipv6_mask,
1824 &flow_tcf_mask_supported.ipv6,
1825 &flow_tcf_mask_empty.ipv6,
1826 sizeof(flow_tcf_mask_supported.ipv6),
1829 if (!eth_type_set || !vlan_eth_type_set)
1830 mnl_attr_put_u16(nlh,
1832 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1833 TCA_FLOWER_KEY_ETH_TYPE,
1834 RTE_BE16(ETH_P_IPV6));
1836 vlan_eth_type_set = 1;
1837 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1839 spec.ipv6 = items->spec;
1840 if (mask.ipv6->hdr.proto) {
1841 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1842 spec.ipv6->hdr.proto);
1845 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1846 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1847 sizeof(spec.ipv6->hdr.src_addr),
1848 spec.ipv6->hdr.src_addr);
1849 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1850 sizeof(mask.ipv6->hdr.src_addr),
1851 mask.ipv6->hdr.src_addr);
1853 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1854 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1855 sizeof(spec.ipv6->hdr.dst_addr),
1856 spec.ipv6->hdr.dst_addr);
1857 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1858 sizeof(mask.ipv6->hdr.dst_addr),
1859 mask.ipv6->hdr.dst_addr);
1862 case RTE_FLOW_ITEM_TYPE_UDP:
1863 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1864 mask.udp = flow_tcf_item_mask
1865 (items, &rte_flow_item_udp_mask,
1866 &flow_tcf_mask_supported.udp,
1867 &flow_tcf_mask_empty.udp,
1868 sizeof(flow_tcf_mask_supported.udp),
1872 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1874 if (mask.udp == &flow_tcf_mask_empty.udp)
1876 spec.udp = items->spec;
1877 if (mask.udp->hdr.src_port) {
1878 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1879 spec.udp->hdr.src_port);
1880 mnl_attr_put_u16(nlh,
1881 TCA_FLOWER_KEY_UDP_SRC_MASK,
1882 mask.udp->hdr.src_port);
1884 if (mask.udp->hdr.dst_port) {
1885 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1886 spec.udp->hdr.dst_port);
1887 mnl_attr_put_u16(nlh,
1888 TCA_FLOWER_KEY_UDP_DST_MASK,
1889 mask.udp->hdr.dst_port);
1892 case RTE_FLOW_ITEM_TYPE_TCP:
1893 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1894 mask.tcp = flow_tcf_item_mask
1895 (items, &rte_flow_item_tcp_mask,
1896 &flow_tcf_mask_supported.tcp,
1897 &flow_tcf_mask_empty.tcp,
1898 sizeof(flow_tcf_mask_supported.tcp),
1902 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1904 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1906 spec.tcp = items->spec;
1907 if (mask.tcp->hdr.src_port) {
1908 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1909 spec.tcp->hdr.src_port);
1910 mnl_attr_put_u16(nlh,
1911 TCA_FLOWER_KEY_TCP_SRC_MASK,
1912 mask.tcp->hdr.src_port);
1914 if (mask.tcp->hdr.dst_port) {
1915 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1916 spec.tcp->hdr.dst_port);
1917 mnl_attr_put_u16(nlh,
1918 TCA_FLOWER_KEY_TCP_DST_MASK,
1919 mask.tcp->hdr.dst_port);
1921 if (mask.tcp->hdr.tcp_flags) {
1924 TCA_FLOWER_KEY_TCP_FLAGS,
1926 (spec.tcp->hdr.tcp_flags));
1929 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1931 (mask.tcp->hdr.tcp_flags));
1935 return rte_flow_error_set(error, ENOTSUP,
1936 RTE_FLOW_ERROR_TYPE_ITEM,
1937 NULL, "item not supported");
1940 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1941 na_act_index_cur = 1;
1942 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1943 struct nlattr *na_act_index;
1944 struct nlattr *na_act;
1945 unsigned int vlan_act;
1948 switch (actions->type) {
1949 case RTE_FLOW_ACTION_TYPE_VOID:
1951 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1952 conf.port_id = actions->conf;
1953 if (conf.port_id->original)
1956 for (i = 0; ptoi[i].ifindex; ++i)
1957 if (ptoi[i].port_id == conf.port_id->id)
1959 assert(ptoi[i].ifindex);
1961 mnl_attr_nest_start(nlh, na_act_index_cur++);
1962 assert(na_act_index);
1963 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1964 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1966 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1967 sizeof(struct tc_mirred),
1968 &(struct tc_mirred){
1969 .action = TC_ACT_STOLEN,
1970 .eaction = TCA_EGRESS_REDIR,
1971 .ifindex = ptoi[i].ifindex,
1973 mnl_attr_nest_end(nlh, na_act);
1974 mnl_attr_nest_end(nlh, na_act_index);
1976 case RTE_FLOW_ACTION_TYPE_JUMP:
1977 conf.jump = actions->conf;
1979 mnl_attr_nest_start(nlh, na_act_index_cur++);
1980 assert(na_act_index);
1981 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1982 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1984 mnl_attr_put(nlh, TCA_GACT_PARMS,
1985 sizeof(struct tc_gact),
1987 .action = TC_ACT_GOTO_CHAIN |
1990 mnl_attr_nest_end(nlh, na_act);
1991 mnl_attr_nest_end(nlh, na_act_index);
1993 case RTE_FLOW_ACTION_TYPE_DROP:
1995 mnl_attr_nest_start(nlh, na_act_index_cur++);
1996 assert(na_act_index);
1997 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1998 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2000 mnl_attr_put(nlh, TCA_GACT_PARMS,
2001 sizeof(struct tc_gact),
2003 .action = TC_ACT_SHOT,
2005 mnl_attr_nest_end(nlh, na_act);
2006 mnl_attr_nest_end(nlh, na_act_index);
2008 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2009 conf.of_push_vlan = NULL;
2010 vlan_act = TCA_VLAN_ACT_POP;
2011 goto action_of_vlan;
2012 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2013 conf.of_push_vlan = actions->conf;
2014 vlan_act = TCA_VLAN_ACT_PUSH;
2015 goto action_of_vlan;
2016 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2017 conf.of_set_vlan_vid = actions->conf;
2019 goto override_na_vlan_id;
2020 vlan_act = TCA_VLAN_ACT_MODIFY;
2021 goto action_of_vlan;
2022 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2023 conf.of_set_vlan_pcp = actions->conf;
2024 if (na_vlan_priority)
2025 goto override_na_vlan_priority;
2026 vlan_act = TCA_VLAN_ACT_MODIFY;
2027 goto action_of_vlan;
2030 mnl_attr_nest_start(nlh, na_act_index_cur++);
2031 assert(na_act_index);
2032 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2033 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2035 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2036 sizeof(struct tc_vlan),
2038 .action = TC_ACT_PIPE,
2039 .v_action = vlan_act,
2041 if (vlan_act == TCA_VLAN_ACT_POP) {
2042 mnl_attr_nest_end(nlh, na_act);
2043 mnl_attr_nest_end(nlh, na_act_index);
2046 if (vlan_act == TCA_VLAN_ACT_PUSH)
2047 mnl_attr_put_u16(nlh,
2048 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2049 conf.of_push_vlan->ethertype);
2050 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2051 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2052 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2053 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2054 mnl_attr_nest_end(nlh, na_act);
2055 mnl_attr_nest_end(nlh, na_act_index);
2056 if (actions->type ==
2057 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2058 override_na_vlan_id:
2059 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2060 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2062 (conf.of_set_vlan_vid->vlan_vid);
2063 } else if (actions->type ==
2064 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2065 override_na_vlan_priority:
2066 na_vlan_priority->nla_type =
2067 TCA_VLAN_PUSH_VLAN_PRIORITY;
2068 *(uint8_t *)mnl_attr_get_payload
2069 (na_vlan_priority) =
2070 conf.of_set_vlan_pcp->vlan_pcp;
2073 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2074 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2075 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2076 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2077 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2078 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2079 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2080 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2081 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2082 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2084 mnl_attr_nest_start(nlh, na_act_index_cur++);
2085 flow_tcf_create_pedit_mnl_msg(nlh,
2086 &actions, item_flags);
2087 mnl_attr_nest_end(nlh, na_act_index);
2090 return rte_flow_error_set(error, ENOTSUP,
2091 RTE_FLOW_ERROR_TYPE_ACTION,
2093 "action not supported");
2097 assert(na_flower_act);
2098 mnl_attr_nest_end(nlh, na_flower_act);
2099 mnl_attr_nest_end(nlh, na_flower);
2104 * Send Netlink message with acknowledgment.
2107 * Libmnl socket to use.
2109 * Message to send. This function always raises the NLM_F_ACK flag before
2113 * 0 on success, a negative errno value otherwise and rte_errno is set.
2116 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
2118 alignas(struct nlmsghdr)
2119 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2120 nlh->nlmsg_len - sizeof(*nlh)];
2121 uint32_t seq = random();
2124 nlh->nlmsg_flags |= NLM_F_ACK;
2125 nlh->nlmsg_seq = seq;
2126 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2128 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2131 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2139 * Apply flow to E-Switch by sending Netlink message.
2142 * Pointer to Ethernet device.
2143 * @param[in, out] flow
2144 * Pointer to the sub flow.
2146 * Pointer to the error structure.
2149 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2152 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2153 struct rte_flow_error *error)
2155 struct priv *priv = dev->data->dev_private;
2156 struct mnl_socket *nl = priv->mnl_socket;
2157 struct mlx5_flow *dev_flow;
2158 struct nlmsghdr *nlh;
2160 dev_flow = LIST_FIRST(&flow->dev_flows);
2161 /* E-Switch flow can't be expanded. */
2162 assert(!LIST_NEXT(dev_flow, next));
2163 nlh = dev_flow->tcf.nlh;
2164 nlh->nlmsg_type = RTM_NEWTFILTER;
2165 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2166 if (!flow_tcf_nl_ack(nl, nlh))
2168 return rte_flow_error_set(error, rte_errno,
2169 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2170 "netlink: failed to create TC flow rule");
2174 * Remove flow from E-Switch by sending Netlink message.
2177 * Pointer to Ethernet device.
2178 * @param[in, out] flow
2179 * Pointer to the sub flow.
2182 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2184 struct priv *priv = dev->data->dev_private;
2185 struct mnl_socket *nl = priv->mnl_socket;
2186 struct mlx5_flow *dev_flow;
2187 struct nlmsghdr *nlh;
2191 dev_flow = LIST_FIRST(&flow->dev_flows);
2194 /* E-Switch flow can't be expanded. */
2195 assert(!LIST_NEXT(dev_flow, next));
2196 nlh = dev_flow->tcf.nlh;
2197 nlh->nlmsg_type = RTM_DELTFILTER;
2198 nlh->nlmsg_flags = NLM_F_REQUEST;
2199 flow_tcf_nl_ack(nl, nlh);
2203 * Remove flow from E-Switch and release resources of the device flow.
2206 * Pointer to Ethernet device.
2207 * @param[in, out] flow
2208 * Pointer to the sub flow.
2211 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2213 struct mlx5_flow *dev_flow;
2217 flow_tcf_remove(dev, flow);
2218 dev_flow = LIST_FIRST(&flow->dev_flows);
2221 /* E-Switch flow can't be expanded. */
2222 assert(!LIST_NEXT(dev_flow, next));
2223 LIST_REMOVE(dev_flow, next);
2227 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2228 .validate = flow_tcf_validate,
2229 .prepare = flow_tcf_prepare,
2230 .translate = flow_tcf_translate,
2231 .apply = flow_tcf_apply,
2232 .remove = flow_tcf_remove,
2233 .destroy = flow_tcf_destroy,
2237 * Initialize ingress qdisc of a given network interface.
2240 * Libmnl socket of the @p NETLINK_ROUTE kind.
2242 * Index of network interface to initialize.
2244 * Perform verbose error reporting if not NULL.
2247 * 0 on success, a negative errno value otherwise and rte_errno is set.
2250 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2251 struct rte_flow_error *error)
2253 struct nlmsghdr *nlh;
2255 alignas(struct nlmsghdr)
2256 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2258 /* Destroy existing ingress qdisc and everything attached to it. */
2259 nlh = mnl_nlmsg_put_header(buf);
2260 nlh->nlmsg_type = RTM_DELQDISC;
2261 nlh->nlmsg_flags = NLM_F_REQUEST;
2262 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2263 tcm->tcm_family = AF_UNSPEC;
2264 tcm->tcm_ifindex = ifindex;
2265 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2266 tcm->tcm_parent = TC_H_INGRESS;
2267 /* Ignore errors when qdisc is already absent. */
2268 if (flow_tcf_nl_ack(nl, nlh) &&
2269 rte_errno != EINVAL && rte_errno != ENOENT)
2270 return rte_flow_error_set(error, rte_errno,
2271 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2272 "netlink: failed to remove ingress"
2274 /* Create fresh ingress qdisc. */
2275 nlh = mnl_nlmsg_put_header(buf);
2276 nlh->nlmsg_type = RTM_NEWQDISC;
2277 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2278 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2279 tcm->tcm_family = AF_UNSPEC;
2280 tcm->tcm_ifindex = ifindex;
2281 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2282 tcm->tcm_parent = TC_H_INGRESS;
2283 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2284 if (flow_tcf_nl_ack(nl, nlh))
2285 return rte_flow_error_set(error, rte_errno,
2286 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2287 "netlink: failed to create ingress"
2293 * Create and configure a libmnl socket for Netlink flow rules.
2296 * A valid libmnl socket object pointer on success, NULL otherwise and
2300 mlx5_flow_tcf_socket_create(void)
2302 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2305 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2307 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2312 mnl_socket_close(nl);
2317 * Destroy a libmnl socket.
2320 * Libmnl socket of the @p NETLINK_ROUTE kind.
2323 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2325 mnl_socket_close(nl);