1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
105 struct tc_pedit_sel {
109 struct tc_pedit_key keys[0];
112 #endif /* HAVE_TC_ACT_VLAN */
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
128 #ifndef HAVE_TCA_CHAIN
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
239 * Structure for holding netlink context.
240 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
241 * Using this (8KB) buffer size ensures that netlink messages will never be
244 struct mlx5_flow_tcf_context {
245 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
246 uint32_t seq; /* Message sequence number. */
247 uint32_t buf_size; /* Message buffer size. */
248 uint8_t *buf; /* Message buffer. */
251 /** Empty masks for known item types. */
253 struct rte_flow_item_port_id port_id;
254 struct rte_flow_item_eth eth;
255 struct rte_flow_item_vlan vlan;
256 struct rte_flow_item_ipv4 ipv4;
257 struct rte_flow_item_ipv6 ipv6;
258 struct rte_flow_item_tcp tcp;
259 struct rte_flow_item_udp udp;
260 } flow_tcf_mask_empty;
262 /** Supported masks for known item types. */
263 static const struct {
264 struct rte_flow_item_port_id port_id;
265 struct rte_flow_item_eth eth;
266 struct rte_flow_item_vlan vlan;
267 struct rte_flow_item_ipv4 ipv4;
268 struct rte_flow_item_ipv6 ipv6;
269 struct rte_flow_item_tcp tcp;
270 struct rte_flow_item_udp udp;
271 } flow_tcf_mask_supported = {
276 .type = RTE_BE16(0xffff),
277 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
278 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
281 /* PCP and VID only, no DEI. */
282 .tci = RTE_BE16(0xefff),
283 .inner_type = RTE_BE16(0xffff),
286 .next_proto_id = 0xff,
287 .src_addr = RTE_BE32(0xffffffff),
288 .dst_addr = RTE_BE32(0xffffffff),
293 "\xff\xff\xff\xff\xff\xff\xff\xff"
294 "\xff\xff\xff\xff\xff\xff\xff\xff",
296 "\xff\xff\xff\xff\xff\xff\xff\xff"
297 "\xff\xff\xff\xff\xff\xff\xff\xff",
300 .src_port = RTE_BE16(0xffff),
301 .dst_port = RTE_BE16(0xffff),
305 .src_port = RTE_BE16(0xffff),
306 .dst_port = RTE_BE16(0xffff),
310 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
311 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
312 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
313 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
314 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
316 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
318 /** DPDK port to network interface index (ifindex) conversion. */
319 struct flow_tcf_ptoi {
320 uint16_t port_id; /**< DPDK port ID. */
321 unsigned int ifindex; /**< Network interface index. */
324 /* Due to a limitation on driver/FW. */
325 #define MLX5_TCF_GROUP_ID_MAX 3
326 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
328 #define MLX5_TCF_FATE_ACTIONS \
329 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
330 MLX5_FLOW_ACTION_JUMP)
332 #define MLX5_TCF_VLAN_ACTIONS \
333 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
334 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
336 #define MLX5_TCF_PEDIT_ACTIONS \
337 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
338 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
339 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
340 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
341 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
343 #define MLX5_TCF_CONFIG_ACTIONS \
344 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
345 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
346 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
347 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
349 #define MAX_PEDIT_KEYS 128
350 #define SZ_PEDIT_KEY_VAL 4
352 #define NUM_OF_PEDIT_KEYS(sz) \
353 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
355 struct pedit_key_ex {
356 enum pedit_header_type htype;
360 struct pedit_parser {
361 struct tc_pedit_sel sel;
362 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
363 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
368 * Set pedit key of MAC address
371 * pointer to action specification
372 * @param[in,out] p_parser
373 * pointer to pedit_parser
376 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
377 struct pedit_parser *p_parser)
379 int idx = p_parser->sel.nkeys;
380 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
381 offsetof(struct ether_hdr, s_addr) :
382 offsetof(struct ether_hdr, d_addr);
383 const struct rte_flow_action_set_mac *conf =
384 (const struct rte_flow_action_set_mac *)actions->conf;
386 p_parser->keys[idx].off = off;
387 p_parser->keys[idx].mask = ~UINT32_MAX;
388 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
389 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
390 memcpy(&p_parser->keys[idx].val,
391 conf->mac_addr, SZ_PEDIT_KEY_VAL);
393 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
394 p_parser->keys[idx].mask = 0xFFFF0000;
395 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
396 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
397 memcpy(&p_parser->keys[idx].val,
398 conf->mac_addr + SZ_PEDIT_KEY_VAL,
399 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
400 p_parser->sel.nkeys = (++idx);
404 * Set pedit key of decrease/set ttl
407 * pointer to action specification
408 * @param[in,out] p_parser
409 * pointer to pedit_parser
410 * @param[in] item_flags
411 * flags of all items presented
414 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
415 struct pedit_parser *p_parser,
418 int idx = p_parser->sel.nkeys;
420 p_parser->keys[idx].mask = 0xFFFFFF00;
421 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
422 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
423 p_parser->keys[idx].off =
424 offsetof(struct ipv4_hdr, time_to_live);
426 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
427 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
428 p_parser->keys[idx].off =
429 offsetof(struct ipv6_hdr, hop_limits);
431 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
432 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
433 p_parser->keys[idx].val = 0x000000FF;
435 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
436 p_parser->keys[idx].val =
437 (__u32)((const struct rte_flow_action_set_ttl *)
438 actions->conf)->ttl_value;
440 p_parser->sel.nkeys = (++idx);
444 * Set pedit key of transport (TCP/UDP) port value
447 * pointer to action specification
448 * @param[in,out] p_parser
449 * pointer to pedit_parser
450 * @param[in] item_flags
451 * flags of all items presented
454 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
455 struct pedit_parser *p_parser,
458 int idx = p_parser->sel.nkeys;
460 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
461 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
462 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
463 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
464 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
465 /* offset of src/dst port is same for TCP and UDP */
466 p_parser->keys[idx].off =
467 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
468 offsetof(struct tcp_hdr, src_port) :
469 offsetof(struct tcp_hdr, dst_port);
470 p_parser->keys[idx].mask = 0xFFFF0000;
471 p_parser->keys[idx].val =
472 (__u32)((const struct rte_flow_action_set_tp *)
473 actions->conf)->port;
474 p_parser->sel.nkeys = (++idx);
478 * Set pedit key of ipv6 address
481 * pointer to action specification
482 * @param[in,out] p_parser
483 * pointer to pedit_parser
486 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
487 struct pedit_parser *p_parser)
489 int idx = p_parser->sel.nkeys;
490 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
492 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
493 offsetof(struct ipv6_hdr, src_addr) :
494 offsetof(struct ipv6_hdr, dst_addr);
495 const struct rte_flow_action_set_ipv6 *conf =
496 (const struct rte_flow_action_set_ipv6 *)actions->conf;
498 for (int i = 0; i < keys; i++, idx++) {
499 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
500 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
501 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
502 p_parser->keys[idx].mask = ~UINT32_MAX;
503 memcpy(&p_parser->keys[idx].val,
504 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
507 p_parser->sel.nkeys += keys;
511 * Set pedit key of ipv4 address
514 * pointer to action specification
515 * @param[in,out] p_parser
516 * pointer to pedit_parser
519 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
520 struct pedit_parser *p_parser)
522 int idx = p_parser->sel.nkeys;
524 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
525 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
526 p_parser->keys[idx].off =
527 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
528 offsetof(struct ipv4_hdr, src_addr) :
529 offsetof(struct ipv4_hdr, dst_addr);
530 p_parser->keys[idx].mask = ~UINT32_MAX;
531 p_parser->keys[idx].val =
532 ((const struct rte_flow_action_set_ipv4 *)
533 actions->conf)->ipv4_addr;
534 p_parser->sel.nkeys = (++idx);
538 * Create the pedit's na attribute in netlink message
539 * on pre-allocate message buffer
542 * pointer to pre-allocated netlink message buffer
543 * @param[in,out] actions
544 * pointer to pointer of actions specification.
545 * @param[in,out] action_flags
546 * pointer to actions flags
547 * @param[in] item_flags
548 * flags of all item presented
551 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
552 const struct rte_flow_action **actions,
555 struct pedit_parser p_parser;
556 struct nlattr *na_act_options;
557 struct nlattr *na_pedit_keys;
559 memset(&p_parser, 0, sizeof(p_parser));
560 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
561 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
562 /* all modify header actions should be in one tc-pedit action */
563 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
564 switch ((*actions)->type) {
565 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
566 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
567 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
569 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
570 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
571 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
573 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
574 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
575 flow_tcf_pedit_key_set_tp_port(*actions,
576 &p_parser, item_flags);
578 case RTE_FLOW_ACTION_TYPE_SET_TTL:
579 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
580 flow_tcf_pedit_key_set_dec_ttl(*actions,
581 &p_parser, item_flags);
583 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
584 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
585 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
588 goto pedit_mnl_msg_done;
592 p_parser.sel.action = TC_ACT_PIPE;
593 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
594 sizeof(p_parser.sel) +
595 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
598 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
599 for (int i = 0; i < p_parser.sel.nkeys; i++) {
600 struct nlattr *na_pedit_key =
601 mnl_attr_nest_start(nl,
602 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
603 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
604 p_parser.keys_ex[i].htype);
605 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
606 p_parser.keys_ex[i].cmd);
607 mnl_attr_nest_end(nl, na_pedit_key);
609 mnl_attr_nest_end(nl, na_pedit_keys);
610 mnl_attr_nest_end(nl, na_act_options);
615 * Calculate max memory size of one TC-pedit actions.
616 * One TC-pedit action can contain set of keys each defining
617 * a rewrite element (rte_flow action)
619 * @param[in,out] actions
620 * actions specification.
621 * @param[in,out] action_flags
623 * @param[in,out] size
626 * Max memory size of one TC-pedit action
629 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
630 uint64_t *action_flags)
636 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
637 SZ_NLATTR_STRZ_OF("pedit") +
638 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
639 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
640 switch ((*actions)->type) {
641 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
642 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
643 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
645 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
646 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
647 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
649 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
650 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
651 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
653 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
654 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
655 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
657 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
658 /* TCP is as same as UDP */
659 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
660 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
662 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
663 /* TCP is as same as UDP */
664 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
665 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
667 case RTE_FLOW_ACTION_TYPE_SET_TTL:
668 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
669 flags |= MLX5_FLOW_ACTION_SET_TTL;
671 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
672 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
673 flags |= MLX5_FLOW_ACTION_DEC_TTL;
675 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
676 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
677 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
679 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
680 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
681 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
684 goto get_pedit_action_size_done;
687 get_pedit_action_size_done:
688 /* TCA_PEDIT_PARAMS_EX */
690 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
691 keys * sizeof(struct tc_pedit_key));
692 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
694 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
695 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
696 SZ_NLATTR_DATA_OF(2));
697 (*action_flags) |= flags;
703 * Retrieve mask for pattern item.
705 * This function does basic sanity checks on a pattern item in order to
706 * return the most appropriate mask for it.
709 * Item specification.
710 * @param[in] mask_default
711 * Default mask for pattern item as specified by the flow API.
712 * @param[in] mask_supported
713 * Mask fields supported by the implementation.
714 * @param[in] mask_empty
715 * Empty mask to return when there is no specification.
717 * Perform verbose error reporting if not NULL.
720 * Either @p item->mask or one of the mask parameters on success, NULL
721 * otherwise and rte_errno is set.
724 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
725 const void *mask_supported, const void *mask_empty,
726 size_t mask_size, struct rte_flow_error *error)
731 /* item->last and item->mask cannot exist without item->spec. */
732 if (!item->spec && (item->mask || item->last)) {
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ITEM, item,
735 "\"mask\" or \"last\" field provided without"
736 " a corresponding \"spec\"");
739 /* No spec, no mask, no problem. */
742 mask = item->mask ? item->mask : mask_default;
745 * Single-pass check to make sure that:
746 * - Mask is supported, no bits are set outside mask_supported.
747 * - Both item->spec and item->last are included in mask.
749 for (i = 0; i != mask_size; ++i) {
752 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
753 ((const uint8_t *)mask_supported)[i]) {
754 rte_flow_error_set(error, ENOTSUP,
755 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
756 "unsupported field found"
761 (((const uint8_t *)item->spec)[i] & mask[i]) !=
762 (((const uint8_t *)item->last)[i] & mask[i])) {
763 rte_flow_error_set(error, EINVAL,
764 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
766 "range between \"spec\" and \"last\""
767 " not comprised in \"mask\"");
775 * Build a conversion table between port ID and ifindex.
778 * Pointer to Ethernet device.
780 * Pointer to ptoi table.
782 * Size of ptoi table provided.
785 * Size of ptoi table filled.
788 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
791 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
792 uint16_t port_id[n + 1];
794 unsigned int own = 0;
796 /* At least one port is needed when no switch domain is present. */
799 port_id[0] = dev->data->port_id;
801 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
805 for (i = 0; i != n; ++i) {
806 struct rte_eth_dev_info dev_info;
808 rte_eth_dev_info_get(port_id[i], &dev_info);
809 if (port_id[i] == dev->data->port_id)
811 ptoi[i].port_id = port_id[i];
812 ptoi[i].ifindex = dev_info.if_index;
814 /* Ensure first entry of ptoi[] is the current device. */
820 /* An entry with zero ifindex terminates ptoi[]. */
827 * Verify the @p attr will be correctly understood by the E-switch.
830 * Pointer to flow attributes
832 * Pointer to error structure.
835 * 0 on success, a negative errno value otherwise and rte_errno is set.
838 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
839 struct rte_flow_error *error)
842 * Supported attributes: groups, some priorities and ingress only.
843 * group is supported only if kernel supports chain. Don't care about
844 * transfer as it is the caller's problem.
846 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
847 return rte_flow_error_set(error, ENOTSUP,
848 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
849 "group ID larger than "
850 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
852 else if (attr->group > 0 &&
853 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
854 return rte_flow_error_set(error, ENOTSUP,
855 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
857 "lowest priority level is "
858 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
859 " when group is configured");
860 else if (attr->priority > 0xfffe)
861 return rte_flow_error_set(error, ENOTSUP,
862 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
864 "lowest priority level is 0xfffe");
866 return rte_flow_error_set(error, EINVAL,
867 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
868 attr, "only ingress is supported");
870 return rte_flow_error_set(error, ENOTSUP,
871 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
872 attr, "egress is not supported");
877 * Validate flow for E-Switch.
880 * Pointer to the priv structure.
882 * Pointer to the flow attributes.
884 * Pointer to the list of items.
886 * Pointer to the list of actions.
888 * Pointer to the error structure.
891 * 0 on success, a negative errno value otherwise and rte_ernno is set.
894 flow_tcf_validate(struct rte_eth_dev *dev,
895 const struct rte_flow_attr *attr,
896 const struct rte_flow_item items[],
897 const struct rte_flow_action actions[],
898 struct rte_flow_error *error)
901 const struct rte_flow_item_port_id *port_id;
902 const struct rte_flow_item_eth *eth;
903 const struct rte_flow_item_vlan *vlan;
904 const struct rte_flow_item_ipv4 *ipv4;
905 const struct rte_flow_item_ipv6 *ipv6;
906 const struct rte_flow_item_tcp *tcp;
907 const struct rte_flow_item_udp *udp;
910 const struct rte_flow_action_port_id *port_id;
911 const struct rte_flow_action_jump *jump;
912 const struct rte_flow_action_of_push_vlan *of_push_vlan;
913 const struct rte_flow_action_of_set_vlan_vid *
915 const struct rte_flow_action_of_set_vlan_pcp *
917 const struct rte_flow_action_set_ipv4 *set_ipv4;
918 const struct rte_flow_action_set_ipv6 *set_ipv6;
920 uint32_t item_flags = 0;
921 uint32_t action_flags = 0;
922 uint8_t next_protocol = -1;
923 unsigned int tcm_ifindex = 0;
924 uint8_t pedit_validated = 0;
925 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
926 struct rte_eth_dev *port_id_dev = NULL;
930 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
931 PTOI_TABLE_SZ_MAX(dev)));
932 ret = flow_tcf_validate_attributes(attr, error);
935 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
938 switch (items->type) {
939 case RTE_FLOW_ITEM_TYPE_VOID:
941 case RTE_FLOW_ITEM_TYPE_PORT_ID:
942 mask.port_id = flow_tcf_item_mask
943 (items, &rte_flow_item_port_id_mask,
944 &flow_tcf_mask_supported.port_id,
945 &flow_tcf_mask_empty.port_id,
946 sizeof(flow_tcf_mask_supported.port_id),
950 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
954 spec.port_id = items->spec;
955 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
956 return rte_flow_error_set
958 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
960 "no support for partial mask on"
962 if (!mask.port_id->id)
965 for (i = 0; ptoi[i].ifindex; ++i)
966 if (ptoi[i].port_id == spec.port_id->id)
968 if (!ptoi[i].ifindex)
969 return rte_flow_error_set
971 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
973 "missing data to convert port ID to"
975 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
976 return rte_flow_error_set
978 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
980 "cannot match traffic for"
981 " several port IDs through"
982 " a single flow rule");
983 tcm_ifindex = ptoi[i].ifindex;
986 case RTE_FLOW_ITEM_TYPE_ETH:
987 ret = mlx5_flow_validate_item_eth(items, item_flags,
991 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
993 * Redundant check due to different supported mask.
994 * Same for the rest of items.
996 mask.eth = flow_tcf_item_mask
997 (items, &rte_flow_item_eth_mask,
998 &flow_tcf_mask_supported.eth,
999 &flow_tcf_mask_empty.eth,
1000 sizeof(flow_tcf_mask_supported.eth),
1004 if (mask.eth->type && mask.eth->type !=
1006 return rte_flow_error_set
1008 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1010 "no support for partial mask on"
1013 case RTE_FLOW_ITEM_TYPE_VLAN:
1014 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1018 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1019 mask.vlan = flow_tcf_item_mask
1020 (items, &rte_flow_item_vlan_mask,
1021 &flow_tcf_mask_supported.vlan,
1022 &flow_tcf_mask_empty.vlan,
1023 sizeof(flow_tcf_mask_supported.vlan),
1027 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1028 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1029 RTE_BE16(0xe000)) ||
1030 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1031 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1032 RTE_BE16(0x0fff)) ||
1033 (mask.vlan->inner_type &&
1034 mask.vlan->inner_type != RTE_BE16(0xffff)))
1035 return rte_flow_error_set
1037 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1039 "no support for partial masks on"
1040 " \"tci\" (PCP and VID parts) and"
1041 " \"inner_type\" fields");
1043 case RTE_FLOW_ITEM_TYPE_IPV4:
1044 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1048 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1049 mask.ipv4 = flow_tcf_item_mask
1050 (items, &rte_flow_item_ipv4_mask,
1051 &flow_tcf_mask_supported.ipv4,
1052 &flow_tcf_mask_empty.ipv4,
1053 sizeof(flow_tcf_mask_supported.ipv4),
1057 if (mask.ipv4->hdr.next_proto_id &&
1058 mask.ipv4->hdr.next_proto_id != 0xff)
1059 return rte_flow_error_set
1061 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1063 "no support for partial mask on"
1064 " \"hdr.next_proto_id\" field");
1065 else if (mask.ipv4->hdr.next_proto_id)
1067 ((const struct rte_flow_item_ipv4 *)
1068 (items->spec))->hdr.next_proto_id;
1070 case RTE_FLOW_ITEM_TYPE_IPV6:
1071 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1075 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1076 mask.ipv6 = flow_tcf_item_mask
1077 (items, &rte_flow_item_ipv6_mask,
1078 &flow_tcf_mask_supported.ipv6,
1079 &flow_tcf_mask_empty.ipv6,
1080 sizeof(flow_tcf_mask_supported.ipv6),
1084 if (mask.ipv6->hdr.proto &&
1085 mask.ipv6->hdr.proto != 0xff)
1086 return rte_flow_error_set
1088 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1090 "no support for partial mask on"
1091 " \"hdr.proto\" field");
1092 else if (mask.ipv6->hdr.proto)
1094 ((const struct rte_flow_item_ipv6 *)
1095 (items->spec))->hdr.proto;
1097 case RTE_FLOW_ITEM_TYPE_UDP:
1098 ret = mlx5_flow_validate_item_udp(items, item_flags,
1099 next_protocol, error);
1102 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1103 mask.udp = flow_tcf_item_mask
1104 (items, &rte_flow_item_udp_mask,
1105 &flow_tcf_mask_supported.udp,
1106 &flow_tcf_mask_empty.udp,
1107 sizeof(flow_tcf_mask_supported.udp),
1112 case RTE_FLOW_ITEM_TYPE_TCP:
1113 ret = mlx5_flow_validate_item_tcp
1116 &flow_tcf_mask_supported.tcp,
1120 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1121 mask.tcp = flow_tcf_item_mask
1122 (items, &rte_flow_item_tcp_mask,
1123 &flow_tcf_mask_supported.tcp,
1124 &flow_tcf_mask_empty.tcp,
1125 sizeof(flow_tcf_mask_supported.tcp),
1131 return rte_flow_error_set(error, ENOTSUP,
1132 RTE_FLOW_ERROR_TYPE_ITEM,
1133 NULL, "item not supported");
1136 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1138 uint32_t current_action_flag = 0;
1140 switch (actions->type) {
1141 case RTE_FLOW_ACTION_TYPE_VOID:
1143 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1144 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1147 conf.port_id = actions->conf;
1148 if (conf.port_id->original)
1151 for (i = 0; ptoi[i].ifindex; ++i)
1152 if (ptoi[i].port_id == conf.port_id->id)
1154 if (!ptoi[i].ifindex)
1155 return rte_flow_error_set
1157 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1159 "missing data to convert port ID to"
1161 port_id_dev = &rte_eth_devices[conf.port_id->id];
1163 case RTE_FLOW_ACTION_TYPE_JUMP:
1164 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1167 conf.jump = actions->conf;
1168 if (attr->group >= conf.jump->group)
1169 return rte_flow_error_set
1171 RTE_FLOW_ERROR_TYPE_ACTION,
1173 "can jump only to a group forward");
1175 case RTE_FLOW_ACTION_TYPE_DROP:
1176 current_action_flag = MLX5_FLOW_ACTION_DROP;
1178 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1179 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1181 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1182 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1184 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1185 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1186 return rte_flow_error_set
1188 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1189 "vlan modify is not supported,"
1190 " set action must follow push action");
1191 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1193 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1194 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1195 return rte_flow_error_set
1197 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1198 "vlan modify is not supported,"
1199 " set action must follow push action");
1200 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1202 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1203 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1205 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1206 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1208 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1209 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1211 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1212 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1214 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1215 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1217 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1218 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1220 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1221 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1223 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1224 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1226 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1227 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1229 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1230 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1233 return rte_flow_error_set(error, ENOTSUP,
1234 RTE_FLOW_ERROR_TYPE_ACTION,
1236 "action not supported");
1238 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1240 return rte_flow_error_set(error, EINVAL,
1241 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1243 "action configuration not set");
1245 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1247 return rte_flow_error_set(error, ENOTSUP,
1248 RTE_FLOW_ERROR_TYPE_ACTION,
1250 "set actions should be "
1251 "listed successively");
1252 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1253 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1254 pedit_validated = 1;
1255 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1256 (action_flags & MLX5_TCF_FATE_ACTIONS))
1257 return rte_flow_error_set(error, EINVAL,
1258 RTE_FLOW_ERROR_TYPE_ACTION,
1260 "can't have multiple fate"
1262 action_flags |= current_action_flag;
1264 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1265 (action_flags & MLX5_FLOW_ACTION_DROP))
1266 return rte_flow_error_set(error, ENOTSUP,
1267 RTE_FLOW_ERROR_TYPE_ACTION,
1269 "set action is not compatible with "
1271 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1272 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1273 return rte_flow_error_set(error, ENOTSUP,
1274 RTE_FLOW_ERROR_TYPE_ACTION,
1276 "set action must be followed by "
1279 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1280 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1281 return rte_flow_error_set(error, EINVAL,
1282 RTE_FLOW_ERROR_TYPE_ACTION,
1284 "no ipv4 item found in"
1288 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1289 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1290 return rte_flow_error_set(error, EINVAL,
1291 RTE_FLOW_ERROR_TYPE_ACTION,
1293 "no ipv6 item found in"
1297 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1299 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1300 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1301 return rte_flow_error_set(error, EINVAL,
1302 RTE_FLOW_ERROR_TYPE_ACTION,
1304 "no TCP/UDP item found in"
1308 * FW syndrome (0xA9C090):
1309 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1310 * forward to the uplink.
1312 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1313 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1314 ((struct priv *)port_id_dev->data->dev_private)->representor)
1315 return rte_flow_error_set(error, ENOTSUP,
1316 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1317 "vlan push can only be applied"
1318 " when forwarding to uplink port");
1320 * FW syndrome (0x294609):
1321 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1322 * are supported only while forwarding to vport.
1324 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1325 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1326 return rte_flow_error_set(error, ENOTSUP,
1327 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1328 "vlan actions are supported"
1329 " only with port_id action");
1330 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1331 return rte_flow_error_set(error, EINVAL,
1332 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1333 "no fate action is found");
1335 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1337 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1338 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1339 return rte_flow_error_set(error, EINVAL,
1340 RTE_FLOW_ERROR_TYPE_ACTION,
1342 "no IP found in pattern");
1345 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1346 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1347 return rte_flow_error_set(error, ENOTSUP,
1348 RTE_FLOW_ERROR_TYPE_ACTION,
1350 "no ethernet found in"
1357 * Calculate maximum size of memory for flow items of Linux TC flower and
1358 * extract specified items.
1361 * Pointer to the list of items.
1362 * @param[out] item_flags
1363 * Pointer to the detected items.
1366 * Maximum size of memory for items.
1369 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1370 const struct rte_flow_item items[],
1371 uint64_t *item_flags)
1376 size += SZ_NLATTR_STRZ_OF("flower") +
1377 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1378 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1379 if (attr->group > 0)
1380 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1381 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1382 switch (items->type) {
1383 case RTE_FLOW_ITEM_TYPE_VOID:
1385 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1387 case RTE_FLOW_ITEM_TYPE_ETH:
1388 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1389 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1390 /* dst/src MAC addr and mask. */
1391 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1393 case RTE_FLOW_ITEM_TYPE_VLAN:
1394 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1395 SZ_NLATTR_TYPE_OF(uint16_t) +
1396 /* VLAN Ether type. */
1397 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1398 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1399 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1401 case RTE_FLOW_ITEM_TYPE_IPV4:
1402 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1403 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1404 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1405 /* dst/src IP addr and mask. */
1406 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1408 case RTE_FLOW_ITEM_TYPE_IPV6:
1409 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1410 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1411 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1412 /* dst/src IP addr and mask. */
1413 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1415 case RTE_FLOW_ITEM_TYPE_UDP:
1416 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1417 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1418 /* dst/src port and mask. */
1419 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1421 case RTE_FLOW_ITEM_TYPE_TCP:
1422 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1423 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1424 /* dst/src port and mask. */
1425 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1429 "unsupported item %p type %d,"
1430 " items must be validated before flow creation",
1431 (const void *)items, items->type);
1435 *item_flags = flags;
1440 * Calculate maximum size of memory for flow actions of Linux TC flower and
1441 * extract specified actions.
1443 * @param[in] actions
1444 * Pointer to the list of actions.
1445 * @param[out] action_flags
1446 * Pointer to the detected actions.
1449 * Maximum size of memory for actions.
1452 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1453 uint64_t *action_flags)
1458 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1459 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1460 switch (actions->type) {
1461 case RTE_FLOW_ACTION_TYPE_VOID:
1463 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1464 size += SZ_NLATTR_NEST + /* na_act_index. */
1465 SZ_NLATTR_STRZ_OF("mirred") +
1466 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1467 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1468 flags |= MLX5_FLOW_ACTION_PORT_ID;
1470 case RTE_FLOW_ACTION_TYPE_JUMP:
1471 size += SZ_NLATTR_NEST + /* na_act_index. */
1472 SZ_NLATTR_STRZ_OF("gact") +
1473 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1474 SZ_NLATTR_TYPE_OF(struct tc_gact);
1475 flags |= MLX5_FLOW_ACTION_JUMP;
1477 case RTE_FLOW_ACTION_TYPE_DROP:
1478 size += SZ_NLATTR_NEST + /* na_act_index. */
1479 SZ_NLATTR_STRZ_OF("gact") +
1480 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1481 SZ_NLATTR_TYPE_OF(struct tc_gact);
1482 flags |= MLX5_FLOW_ACTION_DROP;
1484 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1485 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1486 goto action_of_vlan;
1487 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1488 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1489 goto action_of_vlan;
1490 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1491 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1492 goto action_of_vlan;
1493 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1494 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1495 goto action_of_vlan;
1497 size += SZ_NLATTR_NEST + /* na_act_index. */
1498 SZ_NLATTR_STRZ_OF("vlan") +
1499 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1500 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1501 SZ_NLATTR_TYPE_OF(uint16_t) +
1502 /* VLAN protocol. */
1503 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1504 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1506 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1507 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1508 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1509 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1510 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1511 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1512 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1513 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1514 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1515 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1516 size += flow_tcf_get_pedit_actions_size(&actions,
1521 "unsupported action %p type %d,"
1522 " items must be validated before flow creation",
1523 (const void *)actions, actions->type);
1527 *action_flags = flags;
1532 * Brand rtnetlink buffer with unique handle.
1534 * This handle should be unique for a given network interface to avoid
1538 * Pointer to Netlink message.
1540 * Unique 32-bit handle to use.
1543 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1545 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1547 tcm->tcm_handle = handle;
1548 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1549 (void *)nlh, handle);
1553 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1554 * memory required, allocates the memory, initializes Netlink message headers
1555 * and set unique TC message handle.
1558 * Pointer to the flow attributes.
1560 * Pointer to the list of items.
1561 * @param[in] actions
1562 * Pointer to the list of actions.
1563 * @param[out] item_flags
1564 * Pointer to bit mask of all items detected.
1565 * @param[out] action_flags
1566 * Pointer to bit mask of all actions detected.
1568 * Pointer to the error structure.
1571 * Pointer to mlx5_flow object on success,
1572 * otherwise NULL and rte_ernno is set.
1574 static struct mlx5_flow *
1575 flow_tcf_prepare(const struct rte_flow_attr *attr,
1576 const struct rte_flow_item items[],
1577 const struct rte_flow_action actions[],
1578 uint64_t *item_flags, uint64_t *action_flags,
1579 struct rte_flow_error *error)
1581 size_t size = sizeof(struct mlx5_flow) +
1582 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1583 MNL_ALIGN(sizeof(struct tcmsg));
1584 struct mlx5_flow *dev_flow;
1585 struct nlmsghdr *nlh;
1588 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1589 size += flow_tcf_get_actions_and_size(actions, action_flags);
1590 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1592 rte_flow_error_set(error, ENOMEM,
1593 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1594 "not enough memory to create E-Switch flow");
1597 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1598 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1599 *dev_flow = (struct mlx5_flow){
1600 .tcf = (struct mlx5_flow_tcf){
1606 * Generate a reasonably unique handle based on the address of the
1609 * This is straightforward on 32-bit systems where the flow pointer can
1610 * be used directly. Otherwise, its least significant part is taken
1611 * after shifting it by the previous power of two of the pointed buffer
1614 if (sizeof(dev_flow) <= 4)
1615 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1617 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1618 rte_log2_u32(rte_align32prevpow2(size)));
1623 * Translate flow for Linux TC flower and construct Netlink message.
1626 * Pointer to the priv structure.
1627 * @param[in, out] flow
1628 * Pointer to the sub flow.
1630 * Pointer to the flow attributes.
1632 * Pointer to the list of items.
1633 * @param[in] actions
1634 * Pointer to the list of actions.
1636 * Pointer to the error structure.
1639 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1642 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1643 const struct rte_flow_attr *attr,
1644 const struct rte_flow_item items[],
1645 const struct rte_flow_action actions[],
1646 struct rte_flow_error *error)
1649 const struct rte_flow_item_port_id *port_id;
1650 const struct rte_flow_item_eth *eth;
1651 const struct rte_flow_item_vlan *vlan;
1652 const struct rte_flow_item_ipv4 *ipv4;
1653 const struct rte_flow_item_ipv6 *ipv6;
1654 const struct rte_flow_item_tcp *tcp;
1655 const struct rte_flow_item_udp *udp;
1658 const struct rte_flow_action_port_id *port_id;
1659 const struct rte_flow_action_jump *jump;
1660 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1661 const struct rte_flow_action_of_set_vlan_vid *
1663 const struct rte_flow_action_of_set_vlan_pcp *
1666 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1667 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1668 struct tcmsg *tcm = dev_flow->tcf.tcm;
1669 uint32_t na_act_index_cur;
1670 bool eth_type_set = 0;
1671 bool vlan_present = 0;
1672 bool vlan_eth_type_set = 0;
1673 bool ip_proto_set = 0;
1674 struct nlattr *na_flower;
1675 struct nlattr *na_flower_act;
1676 struct nlattr *na_vlan_id = NULL;
1677 struct nlattr *na_vlan_priority = NULL;
1678 uint64_t item_flags = 0;
1680 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1681 PTOI_TABLE_SZ_MAX(dev)));
1682 nlh = dev_flow->tcf.nlh;
1683 tcm = dev_flow->tcf.tcm;
1684 /* Prepare API must have been called beforehand. */
1685 assert(nlh != NULL && tcm != NULL);
1686 tcm->tcm_family = AF_UNSPEC;
1687 tcm->tcm_ifindex = ptoi[0].ifindex;
1688 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1690 * Priority cannot be zero to prevent the kernel from picking one
1693 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1694 RTE_BE16(ETH_P_ALL));
1695 if (attr->group > 0)
1696 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1697 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1698 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1699 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1700 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1703 switch (items->type) {
1704 case RTE_FLOW_ITEM_TYPE_VOID:
1706 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1707 mask.port_id = flow_tcf_item_mask
1708 (items, &rte_flow_item_port_id_mask,
1709 &flow_tcf_mask_supported.port_id,
1710 &flow_tcf_mask_empty.port_id,
1711 sizeof(flow_tcf_mask_supported.port_id),
1713 assert(mask.port_id);
1714 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1716 spec.port_id = items->spec;
1717 if (!mask.port_id->id)
1720 for (i = 0; ptoi[i].ifindex; ++i)
1721 if (ptoi[i].port_id == spec.port_id->id)
1723 assert(ptoi[i].ifindex);
1724 tcm->tcm_ifindex = ptoi[i].ifindex;
1726 case RTE_FLOW_ITEM_TYPE_ETH:
1727 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1728 mask.eth = flow_tcf_item_mask
1729 (items, &rte_flow_item_eth_mask,
1730 &flow_tcf_mask_supported.eth,
1731 &flow_tcf_mask_empty.eth,
1732 sizeof(flow_tcf_mask_supported.eth),
1735 if (mask.eth == &flow_tcf_mask_empty.eth)
1737 spec.eth = items->spec;
1738 if (mask.eth->type) {
1739 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1743 if (!is_zero_ether_addr(&mask.eth->dst)) {
1744 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1746 spec.eth->dst.addr_bytes);
1747 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1749 mask.eth->dst.addr_bytes);
1751 if (!is_zero_ether_addr(&mask.eth->src)) {
1752 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1754 spec.eth->src.addr_bytes);
1755 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1757 mask.eth->src.addr_bytes);
1760 case RTE_FLOW_ITEM_TYPE_VLAN:
1761 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1762 mask.vlan = flow_tcf_item_mask
1763 (items, &rte_flow_item_vlan_mask,
1764 &flow_tcf_mask_supported.vlan,
1765 &flow_tcf_mask_empty.vlan,
1766 sizeof(flow_tcf_mask_supported.vlan),
1770 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1771 RTE_BE16(ETH_P_8021Q));
1774 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1776 spec.vlan = items->spec;
1777 if (mask.vlan->inner_type) {
1778 mnl_attr_put_u16(nlh,
1779 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1780 spec.vlan->inner_type);
1781 vlan_eth_type_set = 1;
1783 if (mask.vlan->tci & RTE_BE16(0xe000))
1784 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1786 (spec.vlan->tci) >> 13) & 0x7);
1787 if (mask.vlan->tci & RTE_BE16(0x0fff))
1788 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1793 case RTE_FLOW_ITEM_TYPE_IPV4:
1794 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1795 mask.ipv4 = flow_tcf_item_mask
1796 (items, &rte_flow_item_ipv4_mask,
1797 &flow_tcf_mask_supported.ipv4,
1798 &flow_tcf_mask_empty.ipv4,
1799 sizeof(flow_tcf_mask_supported.ipv4),
1802 if (!eth_type_set || !vlan_eth_type_set)
1803 mnl_attr_put_u16(nlh,
1805 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1806 TCA_FLOWER_KEY_ETH_TYPE,
1807 RTE_BE16(ETH_P_IP));
1809 vlan_eth_type_set = 1;
1810 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1812 spec.ipv4 = items->spec;
1813 if (mask.ipv4->hdr.next_proto_id) {
1814 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1815 spec.ipv4->hdr.next_proto_id);
1818 if (mask.ipv4->hdr.src_addr) {
1819 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1820 spec.ipv4->hdr.src_addr);
1821 mnl_attr_put_u32(nlh,
1822 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1823 mask.ipv4->hdr.src_addr);
1825 if (mask.ipv4->hdr.dst_addr) {
1826 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1827 spec.ipv4->hdr.dst_addr);
1828 mnl_attr_put_u32(nlh,
1829 TCA_FLOWER_KEY_IPV4_DST_MASK,
1830 mask.ipv4->hdr.dst_addr);
1833 case RTE_FLOW_ITEM_TYPE_IPV6:
1834 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1835 mask.ipv6 = flow_tcf_item_mask
1836 (items, &rte_flow_item_ipv6_mask,
1837 &flow_tcf_mask_supported.ipv6,
1838 &flow_tcf_mask_empty.ipv6,
1839 sizeof(flow_tcf_mask_supported.ipv6),
1842 if (!eth_type_set || !vlan_eth_type_set)
1843 mnl_attr_put_u16(nlh,
1845 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1846 TCA_FLOWER_KEY_ETH_TYPE,
1847 RTE_BE16(ETH_P_IPV6));
1849 vlan_eth_type_set = 1;
1850 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1852 spec.ipv6 = items->spec;
1853 if (mask.ipv6->hdr.proto) {
1854 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1855 spec.ipv6->hdr.proto);
1858 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1859 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1860 sizeof(spec.ipv6->hdr.src_addr),
1861 spec.ipv6->hdr.src_addr);
1862 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1863 sizeof(mask.ipv6->hdr.src_addr),
1864 mask.ipv6->hdr.src_addr);
1866 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1867 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1868 sizeof(spec.ipv6->hdr.dst_addr),
1869 spec.ipv6->hdr.dst_addr);
1870 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1871 sizeof(mask.ipv6->hdr.dst_addr),
1872 mask.ipv6->hdr.dst_addr);
1875 case RTE_FLOW_ITEM_TYPE_UDP:
1876 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1877 mask.udp = flow_tcf_item_mask
1878 (items, &rte_flow_item_udp_mask,
1879 &flow_tcf_mask_supported.udp,
1880 &flow_tcf_mask_empty.udp,
1881 sizeof(flow_tcf_mask_supported.udp),
1885 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1887 if (mask.udp == &flow_tcf_mask_empty.udp)
1889 spec.udp = items->spec;
1890 if (mask.udp->hdr.src_port) {
1891 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1892 spec.udp->hdr.src_port);
1893 mnl_attr_put_u16(nlh,
1894 TCA_FLOWER_KEY_UDP_SRC_MASK,
1895 mask.udp->hdr.src_port);
1897 if (mask.udp->hdr.dst_port) {
1898 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1899 spec.udp->hdr.dst_port);
1900 mnl_attr_put_u16(nlh,
1901 TCA_FLOWER_KEY_UDP_DST_MASK,
1902 mask.udp->hdr.dst_port);
1905 case RTE_FLOW_ITEM_TYPE_TCP:
1906 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1907 mask.tcp = flow_tcf_item_mask
1908 (items, &rte_flow_item_tcp_mask,
1909 &flow_tcf_mask_supported.tcp,
1910 &flow_tcf_mask_empty.tcp,
1911 sizeof(flow_tcf_mask_supported.tcp),
1915 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1917 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1919 spec.tcp = items->spec;
1920 if (mask.tcp->hdr.src_port) {
1921 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1922 spec.tcp->hdr.src_port);
1923 mnl_attr_put_u16(nlh,
1924 TCA_FLOWER_KEY_TCP_SRC_MASK,
1925 mask.tcp->hdr.src_port);
1927 if (mask.tcp->hdr.dst_port) {
1928 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1929 spec.tcp->hdr.dst_port);
1930 mnl_attr_put_u16(nlh,
1931 TCA_FLOWER_KEY_TCP_DST_MASK,
1932 mask.tcp->hdr.dst_port);
1934 if (mask.tcp->hdr.tcp_flags) {
1937 TCA_FLOWER_KEY_TCP_FLAGS,
1939 (spec.tcp->hdr.tcp_flags));
1942 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1944 (mask.tcp->hdr.tcp_flags));
1948 return rte_flow_error_set(error, ENOTSUP,
1949 RTE_FLOW_ERROR_TYPE_ITEM,
1950 NULL, "item not supported");
1953 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1954 na_act_index_cur = 1;
1955 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1956 struct nlattr *na_act_index;
1957 struct nlattr *na_act;
1958 unsigned int vlan_act;
1961 switch (actions->type) {
1962 case RTE_FLOW_ACTION_TYPE_VOID:
1964 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1965 conf.port_id = actions->conf;
1966 if (conf.port_id->original)
1969 for (i = 0; ptoi[i].ifindex; ++i)
1970 if (ptoi[i].port_id == conf.port_id->id)
1972 assert(ptoi[i].ifindex);
1974 mnl_attr_nest_start(nlh, na_act_index_cur++);
1975 assert(na_act_index);
1976 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1977 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1979 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1980 sizeof(struct tc_mirred),
1981 &(struct tc_mirred){
1982 .action = TC_ACT_STOLEN,
1983 .eaction = TCA_EGRESS_REDIR,
1984 .ifindex = ptoi[i].ifindex,
1986 mnl_attr_nest_end(nlh, na_act);
1987 mnl_attr_nest_end(nlh, na_act_index);
1989 case RTE_FLOW_ACTION_TYPE_JUMP:
1990 conf.jump = actions->conf;
1992 mnl_attr_nest_start(nlh, na_act_index_cur++);
1993 assert(na_act_index);
1994 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1995 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1997 mnl_attr_put(nlh, TCA_GACT_PARMS,
1998 sizeof(struct tc_gact),
2000 .action = TC_ACT_GOTO_CHAIN |
2003 mnl_attr_nest_end(nlh, na_act);
2004 mnl_attr_nest_end(nlh, na_act_index);
2006 case RTE_FLOW_ACTION_TYPE_DROP:
2008 mnl_attr_nest_start(nlh, na_act_index_cur++);
2009 assert(na_act_index);
2010 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2011 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2013 mnl_attr_put(nlh, TCA_GACT_PARMS,
2014 sizeof(struct tc_gact),
2016 .action = TC_ACT_SHOT,
2018 mnl_attr_nest_end(nlh, na_act);
2019 mnl_attr_nest_end(nlh, na_act_index);
2021 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2022 conf.of_push_vlan = NULL;
2023 vlan_act = TCA_VLAN_ACT_POP;
2024 goto action_of_vlan;
2025 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2026 conf.of_push_vlan = actions->conf;
2027 vlan_act = TCA_VLAN_ACT_PUSH;
2028 goto action_of_vlan;
2029 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2030 conf.of_set_vlan_vid = actions->conf;
2032 goto override_na_vlan_id;
2033 vlan_act = TCA_VLAN_ACT_MODIFY;
2034 goto action_of_vlan;
2035 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2036 conf.of_set_vlan_pcp = actions->conf;
2037 if (na_vlan_priority)
2038 goto override_na_vlan_priority;
2039 vlan_act = TCA_VLAN_ACT_MODIFY;
2040 goto action_of_vlan;
2043 mnl_attr_nest_start(nlh, na_act_index_cur++);
2044 assert(na_act_index);
2045 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2046 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2048 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2049 sizeof(struct tc_vlan),
2051 .action = TC_ACT_PIPE,
2052 .v_action = vlan_act,
2054 if (vlan_act == TCA_VLAN_ACT_POP) {
2055 mnl_attr_nest_end(nlh, na_act);
2056 mnl_attr_nest_end(nlh, na_act_index);
2059 if (vlan_act == TCA_VLAN_ACT_PUSH)
2060 mnl_attr_put_u16(nlh,
2061 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2062 conf.of_push_vlan->ethertype);
2063 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2064 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2065 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2066 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2067 mnl_attr_nest_end(nlh, na_act);
2068 mnl_attr_nest_end(nlh, na_act_index);
2069 if (actions->type ==
2070 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2071 override_na_vlan_id:
2072 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2073 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2075 (conf.of_set_vlan_vid->vlan_vid);
2076 } else if (actions->type ==
2077 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2078 override_na_vlan_priority:
2079 na_vlan_priority->nla_type =
2080 TCA_VLAN_PUSH_VLAN_PRIORITY;
2081 *(uint8_t *)mnl_attr_get_payload
2082 (na_vlan_priority) =
2083 conf.of_set_vlan_pcp->vlan_pcp;
2086 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2087 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2088 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2089 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2090 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2091 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2092 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2093 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2094 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2095 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2097 mnl_attr_nest_start(nlh, na_act_index_cur++);
2098 flow_tcf_create_pedit_mnl_msg(nlh,
2099 &actions, item_flags);
2100 mnl_attr_nest_end(nlh, na_act_index);
2103 return rte_flow_error_set(error, ENOTSUP,
2104 RTE_FLOW_ERROR_TYPE_ACTION,
2106 "action not supported");
2110 assert(na_flower_act);
2111 mnl_attr_nest_end(nlh, na_flower_act);
2112 mnl_attr_nest_end(nlh, na_flower);
2117 * Send Netlink message with acknowledgment.
2120 * Flow context to use.
2122 * Message to send. This function always raises the NLM_F_ACK flag before
2126 * 0 on success, a negative errno value otherwise and rte_errno is set.
2129 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2131 alignas(struct nlmsghdr)
2132 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2133 nlh->nlmsg_len - sizeof(*nlh)];
2134 uint32_t seq = ctx->seq++;
2135 struct mnl_socket *nl = ctx->nl;
2138 nlh->nlmsg_flags |= NLM_F_ACK;
2139 nlh->nlmsg_seq = seq;
2140 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2142 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2145 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2153 * Apply flow to E-Switch by sending Netlink message.
2156 * Pointer to Ethernet device.
2157 * @param[in, out] flow
2158 * Pointer to the sub flow.
2160 * Pointer to the error structure.
2163 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2166 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2167 struct rte_flow_error *error)
2169 struct priv *priv = dev->data->dev_private;
2170 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2171 struct mlx5_flow *dev_flow;
2172 struct nlmsghdr *nlh;
2174 dev_flow = LIST_FIRST(&flow->dev_flows);
2175 /* E-Switch flow can't be expanded. */
2176 assert(!LIST_NEXT(dev_flow, next));
2177 nlh = dev_flow->tcf.nlh;
2178 nlh->nlmsg_type = RTM_NEWTFILTER;
2179 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2180 if (!flow_tcf_nl_ack(ctx, nlh))
2182 return rte_flow_error_set(error, rte_errno,
2183 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2184 "netlink: failed to create TC flow rule");
2188 * Remove flow from E-Switch by sending Netlink message.
2191 * Pointer to Ethernet device.
2192 * @param[in, out] flow
2193 * Pointer to the sub flow.
2196 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2198 struct priv *priv = dev->data->dev_private;
2199 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2200 struct mlx5_flow *dev_flow;
2201 struct nlmsghdr *nlh;
2205 dev_flow = LIST_FIRST(&flow->dev_flows);
2208 /* E-Switch flow can't be expanded. */
2209 assert(!LIST_NEXT(dev_flow, next));
2210 nlh = dev_flow->tcf.nlh;
2211 nlh->nlmsg_type = RTM_DELTFILTER;
2212 nlh->nlmsg_flags = NLM_F_REQUEST;
2213 flow_tcf_nl_ack(ctx, nlh);
2217 * Remove flow from E-Switch and release resources of the device flow.
2220 * Pointer to Ethernet device.
2221 * @param[in, out] flow
2222 * Pointer to the sub flow.
2225 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2227 struct mlx5_flow *dev_flow;
2231 flow_tcf_remove(dev, flow);
2232 dev_flow = LIST_FIRST(&flow->dev_flows);
2235 /* E-Switch flow can't be expanded. */
2236 assert(!LIST_NEXT(dev_flow, next));
2237 LIST_REMOVE(dev_flow, next);
2244 * @see rte_flow_query()
2248 flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
2249 struct rte_flow *flow __rte_unused,
2250 const struct rte_flow_action *actions __rte_unused,
2251 void *data __rte_unused,
2252 struct rte_flow_error *error __rte_unused)
2254 rte_errno = ENOTSUP;
2258 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2259 .validate = flow_tcf_validate,
2260 .prepare = flow_tcf_prepare,
2261 .translate = flow_tcf_translate,
2262 .apply = flow_tcf_apply,
2263 .remove = flow_tcf_remove,
2264 .destroy = flow_tcf_destroy,
2265 .query = flow_tcf_query,
2269 * Create and configure a libmnl socket for Netlink flow rules.
2272 * A valid libmnl socket object pointer on success, NULL otherwise and
2275 static struct mnl_socket *
2276 flow_tcf_mnl_socket_create(void)
2278 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2281 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2283 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2288 mnl_socket_close(nl);
2293 * Destroy a libmnl socket.
2296 * Libmnl socket of the @p NETLINK_ROUTE kind.
2299 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2302 mnl_socket_close(nl);
2306 * Initialize ingress qdisc of a given network interface.
2309 * Pointer to tc-flower context to use.
2311 * Index of network interface to initialize.
2313 * Perform verbose error reporting if not NULL.
2316 * 0 on success, a negative errno value otherwise and rte_errno is set.
2319 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2320 unsigned int ifindex, struct rte_flow_error *error)
2322 struct nlmsghdr *nlh;
2324 alignas(struct nlmsghdr)
2325 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2327 /* Destroy existing ingress qdisc and everything attached to it. */
2328 nlh = mnl_nlmsg_put_header(buf);
2329 nlh->nlmsg_type = RTM_DELQDISC;
2330 nlh->nlmsg_flags = NLM_F_REQUEST;
2331 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2332 tcm->tcm_family = AF_UNSPEC;
2333 tcm->tcm_ifindex = ifindex;
2334 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2335 tcm->tcm_parent = TC_H_INGRESS;
2336 /* Ignore errors when qdisc is already absent. */
2337 if (flow_tcf_nl_ack(ctx, nlh) &&
2338 rte_errno != EINVAL && rte_errno != ENOENT)
2339 return rte_flow_error_set(error, rte_errno,
2340 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2341 "netlink: failed to remove ingress"
2343 /* Create fresh ingress qdisc. */
2344 nlh = mnl_nlmsg_put_header(buf);
2345 nlh->nlmsg_type = RTM_NEWQDISC;
2346 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2347 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2348 tcm->tcm_family = AF_UNSPEC;
2349 tcm->tcm_ifindex = ifindex;
2350 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2351 tcm->tcm_parent = TC_H_INGRESS;
2352 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2353 if (flow_tcf_nl_ack(ctx, nlh))
2354 return rte_flow_error_set(error, rte_errno,
2355 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2356 "netlink: failed to create ingress"
2362 * Create libmnl context for Netlink flow rules.
2365 * A valid libmnl socket object pointer on success, NULL otherwise and
2368 struct mlx5_flow_tcf_context *
2369 mlx5_flow_tcf_context_create(void)
2371 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2376 ctx->nl = flow_tcf_mnl_socket_create();
2379 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2380 ctx->buf = rte_zmalloc(__func__,
2381 ctx->buf_size, sizeof(uint32_t));
2384 ctx->seq = random();
2387 mlx5_flow_tcf_context_destroy(ctx);
2392 * Destroy a libmnl context.
2395 * Libmnl socket of the @p NETLINK_ROUTE kind.
2398 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2402 flow_tcf_mnl_socket_destroy(ctx->nl);