1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 /* Normally found in linux/netlink.h. */
117 #ifndef NETLINK_CAP_ACK
118 #define NETLINK_CAP_ACK 10
121 /* Normally found in linux/pkt_sched.h. */
122 #ifndef TC_H_MIN_INGRESS
123 #define TC_H_MIN_INGRESS 0xfff2u
126 /* Normally found in linux/pkt_cls.h. */
127 #ifndef TCA_CLS_FLAGS_SKIP_SW
128 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
130 #ifndef HAVE_TCA_CHAIN
133 #ifndef HAVE_TCA_FLOWER_ACT
134 #define TCA_FLOWER_ACT 3
136 #ifndef HAVE_TCA_FLOWER_FLAGS
137 #define TCA_FLOWER_FLAGS 22
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
140 #define TCA_FLOWER_KEY_ETH_TYPE 8
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
143 #define TCA_FLOWER_KEY_ETH_DST 4
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
146 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
149 #define TCA_FLOWER_KEY_ETH_SRC 6
151 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
152 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
154 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
155 #define TCA_FLOWER_KEY_IP_PROTO 9
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
158 #define TCA_FLOWER_KEY_IPV4_SRC 10
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
161 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
164 #define TCA_FLOWER_KEY_IPV4_DST 12
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
167 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
170 #define TCA_FLOWER_KEY_IPV6_SRC 14
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
173 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
176 #define TCA_FLOWER_KEY_IPV6_DST 16
178 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
179 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
182 #define TCA_FLOWER_KEY_TCP_SRC 18
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
185 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
188 #define TCA_FLOWER_KEY_TCP_DST 19
190 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
191 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
194 #define TCA_FLOWER_KEY_UDP_SRC 20
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
197 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
200 #define TCA_FLOWER_KEY_UDP_DST 21
202 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
203 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
206 #define TCA_FLOWER_KEY_VLAN_ID 23
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
209 #define TCA_FLOWER_KEY_VLAN_PRIO 24
211 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
212 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
215 #define TCA_FLOWER_KEY_TCP_FLAGS 71
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
218 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
220 #ifndef HAVE_TC_ACT_GOTO_CHAIN
221 #define TC_ACT_GOTO_CHAIN 0x20000000
224 #ifndef IPV6_ADDR_LEN
225 #define IPV6_ADDR_LEN 16
228 #ifndef IPV4_ADDR_LEN
229 #define IPV4_ADDR_LEN 4
233 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
240 #ifndef TCA_ACT_MAX_PRIO
241 #define TCA_ACT_MAX_PRIO 32
245 * Structure for holding netlink context.
246 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
247 * Using this (8KB) buffer size ensures that netlink messages will never be
250 struct mlx5_flow_tcf_context {
251 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
252 uint32_t seq; /* Message sequence number. */
253 uint32_t buf_size; /* Message buffer size. */
254 uint8_t *buf; /* Message buffer. */
257 /** Structure used when extracting the values of a flow counters
258 * from a netlink message.
260 struct flow_tcf_stats_basic {
262 struct gnet_stats_basic counters;
265 /** Empty masks for known item types. */
267 struct rte_flow_item_port_id port_id;
268 struct rte_flow_item_eth eth;
269 struct rte_flow_item_vlan vlan;
270 struct rte_flow_item_ipv4 ipv4;
271 struct rte_flow_item_ipv6 ipv6;
272 struct rte_flow_item_tcp tcp;
273 struct rte_flow_item_udp udp;
274 } flow_tcf_mask_empty;
276 /** Supported masks for known item types. */
277 static const struct {
278 struct rte_flow_item_port_id port_id;
279 struct rte_flow_item_eth eth;
280 struct rte_flow_item_vlan vlan;
281 struct rte_flow_item_ipv4 ipv4;
282 struct rte_flow_item_ipv6 ipv6;
283 struct rte_flow_item_tcp tcp;
284 struct rte_flow_item_udp udp;
285 } flow_tcf_mask_supported = {
290 .type = RTE_BE16(0xffff),
291 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
292 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 /* PCP and VID only, no DEI. */
296 .tci = RTE_BE16(0xefff),
297 .inner_type = RTE_BE16(0xffff),
300 .next_proto_id = 0xff,
301 .src_addr = RTE_BE32(0xffffffff),
302 .dst_addr = RTE_BE32(0xffffffff),
307 "\xff\xff\xff\xff\xff\xff\xff\xff"
308 "\xff\xff\xff\xff\xff\xff\xff\xff",
310 "\xff\xff\xff\xff\xff\xff\xff\xff"
311 "\xff\xff\xff\xff\xff\xff\xff\xff",
314 .src_port = RTE_BE16(0xffff),
315 .dst_port = RTE_BE16(0xffff),
319 .src_port = RTE_BE16(0xffff),
320 .dst_port = RTE_BE16(0xffff),
324 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
325 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
326 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
327 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
328 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
330 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
332 /** DPDK port to network interface index (ifindex) conversion. */
333 struct flow_tcf_ptoi {
334 uint16_t port_id; /**< DPDK port ID. */
335 unsigned int ifindex; /**< Network interface index. */
338 /* Due to a limitation on driver/FW. */
339 #define MLX5_TCF_GROUP_ID_MAX 3
340 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
342 #define MLX5_TCF_FATE_ACTIONS \
343 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
344 MLX5_FLOW_ACTION_JUMP)
346 #define MLX5_TCF_VLAN_ACTIONS \
347 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
348 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
350 #define MLX5_TCF_PEDIT_ACTIONS \
351 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
352 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
353 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
354 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
355 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
357 #define MLX5_TCF_CONFIG_ACTIONS \
358 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
359 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
360 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
361 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
363 #define MAX_PEDIT_KEYS 128
364 #define SZ_PEDIT_KEY_VAL 4
366 #define NUM_OF_PEDIT_KEYS(sz) \
367 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
369 struct pedit_key_ex {
370 enum pedit_header_type htype;
374 struct pedit_parser {
375 struct tc_pedit_sel sel;
376 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
377 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
381 * Create space for using the implicitly created TC flow counter.
384 * Pointer to the Ethernet device structure.
387 * A pointer to the counter data structure, NULL otherwise and
390 static struct mlx5_flow_counter *
391 flow_tcf_counter_new(void)
393 struct mlx5_flow_counter *cnt;
396 * eswitch counter cannot be shared and its id is unknown.
397 * currently returning all with id 0.
398 * in the future maybe better to switch to unique numbers.
400 struct mlx5_flow_counter tmpl = {
403 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
409 /* Implicit counter, do not add to list. */
414 * Set pedit key of MAC address
417 * pointer to action specification
418 * @param[in,out] p_parser
419 * pointer to pedit_parser
422 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
423 struct pedit_parser *p_parser)
425 int idx = p_parser->sel.nkeys;
426 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
427 offsetof(struct ether_hdr, s_addr) :
428 offsetof(struct ether_hdr, d_addr);
429 const struct rte_flow_action_set_mac *conf =
430 (const struct rte_flow_action_set_mac *)actions->conf;
432 p_parser->keys[idx].off = off;
433 p_parser->keys[idx].mask = ~UINT32_MAX;
434 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
435 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
436 memcpy(&p_parser->keys[idx].val,
437 conf->mac_addr, SZ_PEDIT_KEY_VAL);
439 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
440 p_parser->keys[idx].mask = 0xFFFF0000;
441 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
442 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
443 memcpy(&p_parser->keys[idx].val,
444 conf->mac_addr + SZ_PEDIT_KEY_VAL,
445 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
446 p_parser->sel.nkeys = (++idx);
450 * Set pedit key of decrease/set ttl
453 * pointer to action specification
454 * @param[in,out] p_parser
455 * pointer to pedit_parser
456 * @param[in] item_flags
457 * flags of all items presented
460 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
461 struct pedit_parser *p_parser,
464 int idx = p_parser->sel.nkeys;
466 p_parser->keys[idx].mask = 0xFFFFFF00;
467 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
468 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
469 p_parser->keys[idx].off =
470 offsetof(struct ipv4_hdr, time_to_live);
472 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
473 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
474 p_parser->keys[idx].off =
475 offsetof(struct ipv6_hdr, hop_limits);
477 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
478 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
479 p_parser->keys[idx].val = 0x000000FF;
481 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
482 p_parser->keys[idx].val =
483 (__u32)((const struct rte_flow_action_set_ttl *)
484 actions->conf)->ttl_value;
486 p_parser->sel.nkeys = (++idx);
490 * Set pedit key of transport (TCP/UDP) port value
493 * pointer to action specification
494 * @param[in,out] p_parser
495 * pointer to pedit_parser
496 * @param[in] item_flags
497 * flags of all items presented
500 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
501 struct pedit_parser *p_parser,
504 int idx = p_parser->sel.nkeys;
506 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
507 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
508 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
509 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
510 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
511 /* offset of src/dst port is same for TCP and UDP */
512 p_parser->keys[idx].off =
513 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
514 offsetof(struct tcp_hdr, src_port) :
515 offsetof(struct tcp_hdr, dst_port);
516 p_parser->keys[idx].mask = 0xFFFF0000;
517 p_parser->keys[idx].val =
518 (__u32)((const struct rte_flow_action_set_tp *)
519 actions->conf)->port;
520 p_parser->sel.nkeys = (++idx);
524 * Set pedit key of ipv6 address
527 * pointer to action specification
528 * @param[in,out] p_parser
529 * pointer to pedit_parser
532 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
533 struct pedit_parser *p_parser)
535 int idx = p_parser->sel.nkeys;
536 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
538 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
539 offsetof(struct ipv6_hdr, src_addr) :
540 offsetof(struct ipv6_hdr, dst_addr);
541 const struct rte_flow_action_set_ipv6 *conf =
542 (const struct rte_flow_action_set_ipv6 *)actions->conf;
544 for (int i = 0; i < keys; i++, idx++) {
545 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
546 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
547 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
548 p_parser->keys[idx].mask = ~UINT32_MAX;
549 memcpy(&p_parser->keys[idx].val,
550 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
553 p_parser->sel.nkeys += keys;
557 * Set pedit key of ipv4 address
560 * pointer to action specification
561 * @param[in,out] p_parser
562 * pointer to pedit_parser
565 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
566 struct pedit_parser *p_parser)
568 int idx = p_parser->sel.nkeys;
570 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
571 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
572 p_parser->keys[idx].off =
573 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
574 offsetof(struct ipv4_hdr, src_addr) :
575 offsetof(struct ipv4_hdr, dst_addr);
576 p_parser->keys[idx].mask = ~UINT32_MAX;
577 p_parser->keys[idx].val =
578 ((const struct rte_flow_action_set_ipv4 *)
579 actions->conf)->ipv4_addr;
580 p_parser->sel.nkeys = (++idx);
584 * Create the pedit's na attribute in netlink message
585 * on pre-allocate message buffer
588 * pointer to pre-allocated netlink message buffer
589 * @param[in,out] actions
590 * pointer to pointer of actions specification.
591 * @param[in,out] action_flags
592 * pointer to actions flags
593 * @param[in] item_flags
594 * flags of all item presented
597 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
598 const struct rte_flow_action **actions,
601 struct pedit_parser p_parser;
602 struct nlattr *na_act_options;
603 struct nlattr *na_pedit_keys;
605 memset(&p_parser, 0, sizeof(p_parser));
606 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
607 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
608 /* all modify header actions should be in one tc-pedit action */
609 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
610 switch ((*actions)->type) {
611 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
612 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
613 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
615 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
616 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
617 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
619 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
620 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
621 flow_tcf_pedit_key_set_tp_port(*actions,
622 &p_parser, item_flags);
624 case RTE_FLOW_ACTION_TYPE_SET_TTL:
625 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
626 flow_tcf_pedit_key_set_dec_ttl(*actions,
627 &p_parser, item_flags);
629 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
630 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
631 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
634 goto pedit_mnl_msg_done;
638 p_parser.sel.action = TC_ACT_PIPE;
639 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
640 sizeof(p_parser.sel) +
641 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
644 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
645 for (int i = 0; i < p_parser.sel.nkeys; i++) {
646 struct nlattr *na_pedit_key =
647 mnl_attr_nest_start(nl,
648 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
649 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
650 p_parser.keys_ex[i].htype);
651 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
652 p_parser.keys_ex[i].cmd);
653 mnl_attr_nest_end(nl, na_pedit_key);
655 mnl_attr_nest_end(nl, na_pedit_keys);
656 mnl_attr_nest_end(nl, na_act_options);
661 * Calculate max memory size of one TC-pedit actions.
662 * One TC-pedit action can contain set of keys each defining
663 * a rewrite element (rte_flow action)
665 * @param[in,out] actions
666 * actions specification.
667 * @param[in,out] action_flags
669 * @param[in,out] size
672 * Max memory size of one TC-pedit action
675 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
676 uint64_t *action_flags)
682 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
683 SZ_NLATTR_STRZ_OF("pedit") +
684 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
685 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
686 switch ((*actions)->type) {
687 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
688 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
689 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
691 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
692 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
693 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
695 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
696 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
697 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
699 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
700 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
701 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
703 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
704 /* TCP is as same as UDP */
705 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
706 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
708 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
709 /* TCP is as same as UDP */
710 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
711 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
713 case RTE_FLOW_ACTION_TYPE_SET_TTL:
714 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
715 flags |= MLX5_FLOW_ACTION_SET_TTL;
717 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
718 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
719 flags |= MLX5_FLOW_ACTION_DEC_TTL;
721 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
722 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
723 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
725 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
726 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
727 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
730 goto get_pedit_action_size_done;
733 get_pedit_action_size_done:
734 /* TCA_PEDIT_PARAMS_EX */
736 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
737 keys * sizeof(struct tc_pedit_key));
738 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
740 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
741 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
742 SZ_NLATTR_DATA_OF(2));
743 (*action_flags) |= flags;
749 * Retrieve mask for pattern item.
751 * This function does basic sanity checks on a pattern item in order to
752 * return the most appropriate mask for it.
755 * Item specification.
756 * @param[in] mask_default
757 * Default mask for pattern item as specified by the flow API.
758 * @param[in] mask_supported
759 * Mask fields supported by the implementation.
760 * @param[in] mask_empty
761 * Empty mask to return when there is no specification.
763 * Perform verbose error reporting if not NULL.
766 * Either @p item->mask or one of the mask parameters on success, NULL
767 * otherwise and rte_errno is set.
770 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
771 const void *mask_supported, const void *mask_empty,
772 size_t mask_size, struct rte_flow_error *error)
777 /* item->last and item->mask cannot exist without item->spec. */
778 if (!item->spec && (item->mask || item->last)) {
779 rte_flow_error_set(error, EINVAL,
780 RTE_FLOW_ERROR_TYPE_ITEM, item,
781 "\"mask\" or \"last\" field provided without"
782 " a corresponding \"spec\"");
785 /* No spec, no mask, no problem. */
788 mask = item->mask ? item->mask : mask_default;
791 * Single-pass check to make sure that:
792 * - Mask is supported, no bits are set outside mask_supported.
793 * - Both item->spec and item->last are included in mask.
795 for (i = 0; i != mask_size; ++i) {
798 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
799 ((const uint8_t *)mask_supported)[i]) {
800 rte_flow_error_set(error, ENOTSUP,
801 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
802 "unsupported field found"
807 (((const uint8_t *)item->spec)[i] & mask[i]) !=
808 (((const uint8_t *)item->last)[i] & mask[i])) {
809 rte_flow_error_set(error, EINVAL,
810 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
812 "range between \"spec\" and \"last\""
813 " not comprised in \"mask\"");
821 * Build a conversion table between port ID and ifindex.
824 * Pointer to Ethernet device.
826 * Pointer to ptoi table.
828 * Size of ptoi table provided.
831 * Size of ptoi table filled.
834 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
837 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
838 uint16_t port_id[n + 1];
840 unsigned int own = 0;
842 /* At least one port is needed when no switch domain is present. */
845 port_id[0] = dev->data->port_id;
847 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
851 for (i = 0; i != n; ++i) {
852 struct rte_eth_dev_info dev_info;
854 rte_eth_dev_info_get(port_id[i], &dev_info);
855 if (port_id[i] == dev->data->port_id)
857 ptoi[i].port_id = port_id[i];
858 ptoi[i].ifindex = dev_info.if_index;
860 /* Ensure first entry of ptoi[] is the current device. */
866 /* An entry with zero ifindex terminates ptoi[]. */
873 * Verify the @p attr will be correctly understood by the E-switch.
876 * Pointer to flow attributes
878 * Pointer to error structure.
881 * 0 on success, a negative errno value otherwise and rte_errno is set.
884 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
885 struct rte_flow_error *error)
888 * Supported attributes: groups, some priorities and ingress only.
889 * group is supported only if kernel supports chain. Don't care about
890 * transfer as it is the caller's problem.
892 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
893 return rte_flow_error_set(error, ENOTSUP,
894 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
895 "group ID larger than "
896 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
898 else if (attr->group > 0 &&
899 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
900 return rte_flow_error_set(error, ENOTSUP,
901 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
903 "lowest priority level is "
904 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
905 " when group is configured");
906 else if (attr->priority > 0xfffe)
907 return rte_flow_error_set(error, ENOTSUP,
908 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
910 "lowest priority level is 0xfffe");
912 return rte_flow_error_set(error, EINVAL,
913 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
914 attr, "only ingress is supported");
916 return rte_flow_error_set(error, ENOTSUP,
917 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
918 attr, "egress is not supported");
923 * Validate flow for E-Switch.
926 * Pointer to the priv structure.
928 * Pointer to the flow attributes.
930 * Pointer to the list of items.
932 * Pointer to the list of actions.
934 * Pointer to the error structure.
937 * 0 on success, a negative errno value otherwise and rte_ernno is set.
940 flow_tcf_validate(struct rte_eth_dev *dev,
941 const struct rte_flow_attr *attr,
942 const struct rte_flow_item items[],
943 const struct rte_flow_action actions[],
944 struct rte_flow_error *error)
947 const struct rte_flow_item_port_id *port_id;
948 const struct rte_flow_item_eth *eth;
949 const struct rte_flow_item_vlan *vlan;
950 const struct rte_flow_item_ipv4 *ipv4;
951 const struct rte_flow_item_ipv6 *ipv6;
952 const struct rte_flow_item_tcp *tcp;
953 const struct rte_flow_item_udp *udp;
956 const struct rte_flow_action_port_id *port_id;
957 const struct rte_flow_action_jump *jump;
958 const struct rte_flow_action_of_push_vlan *of_push_vlan;
959 const struct rte_flow_action_of_set_vlan_vid *
961 const struct rte_flow_action_of_set_vlan_pcp *
963 const struct rte_flow_action_set_ipv4 *set_ipv4;
964 const struct rte_flow_action_set_ipv6 *set_ipv6;
966 uint32_t item_flags = 0;
967 uint32_t action_flags = 0;
968 uint8_t next_protocol = -1;
969 unsigned int tcm_ifindex = 0;
970 uint8_t pedit_validated = 0;
971 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
972 struct rte_eth_dev *port_id_dev = NULL;
976 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
977 PTOI_TABLE_SZ_MAX(dev)));
978 ret = flow_tcf_validate_attributes(attr, error);
981 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
984 switch (items->type) {
985 case RTE_FLOW_ITEM_TYPE_VOID:
987 case RTE_FLOW_ITEM_TYPE_PORT_ID:
988 mask.port_id = flow_tcf_item_mask
989 (items, &rte_flow_item_port_id_mask,
990 &flow_tcf_mask_supported.port_id,
991 &flow_tcf_mask_empty.port_id,
992 sizeof(flow_tcf_mask_supported.port_id),
996 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1000 spec.port_id = items->spec;
1001 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1002 return rte_flow_error_set
1004 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1006 "no support for partial mask on"
1008 if (!mask.port_id->id)
1011 for (i = 0; ptoi[i].ifindex; ++i)
1012 if (ptoi[i].port_id == spec.port_id->id)
1014 if (!ptoi[i].ifindex)
1015 return rte_flow_error_set
1017 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1019 "missing data to convert port ID to"
1021 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1022 return rte_flow_error_set
1024 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1026 "cannot match traffic for"
1027 " several port IDs through"
1028 " a single flow rule");
1029 tcm_ifindex = ptoi[i].ifindex;
1032 case RTE_FLOW_ITEM_TYPE_ETH:
1033 ret = mlx5_flow_validate_item_eth(items, item_flags,
1037 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1039 * Redundant check due to different supported mask.
1040 * Same for the rest of items.
1042 mask.eth = flow_tcf_item_mask
1043 (items, &rte_flow_item_eth_mask,
1044 &flow_tcf_mask_supported.eth,
1045 &flow_tcf_mask_empty.eth,
1046 sizeof(flow_tcf_mask_supported.eth),
1050 if (mask.eth->type && mask.eth->type !=
1052 return rte_flow_error_set
1054 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1056 "no support for partial mask on"
1059 case RTE_FLOW_ITEM_TYPE_VLAN:
1060 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1064 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1065 mask.vlan = flow_tcf_item_mask
1066 (items, &rte_flow_item_vlan_mask,
1067 &flow_tcf_mask_supported.vlan,
1068 &flow_tcf_mask_empty.vlan,
1069 sizeof(flow_tcf_mask_supported.vlan),
1073 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1074 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1075 RTE_BE16(0xe000)) ||
1076 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1077 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1078 RTE_BE16(0x0fff)) ||
1079 (mask.vlan->inner_type &&
1080 mask.vlan->inner_type != RTE_BE16(0xffff)))
1081 return rte_flow_error_set
1083 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1085 "no support for partial masks on"
1086 " \"tci\" (PCP and VID parts) and"
1087 " \"inner_type\" fields");
1089 case RTE_FLOW_ITEM_TYPE_IPV4:
1090 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1094 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1095 mask.ipv4 = flow_tcf_item_mask
1096 (items, &rte_flow_item_ipv4_mask,
1097 &flow_tcf_mask_supported.ipv4,
1098 &flow_tcf_mask_empty.ipv4,
1099 sizeof(flow_tcf_mask_supported.ipv4),
1103 if (mask.ipv4->hdr.next_proto_id &&
1104 mask.ipv4->hdr.next_proto_id != 0xff)
1105 return rte_flow_error_set
1107 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1109 "no support for partial mask on"
1110 " \"hdr.next_proto_id\" field");
1111 else if (mask.ipv4->hdr.next_proto_id)
1113 ((const struct rte_flow_item_ipv4 *)
1114 (items->spec))->hdr.next_proto_id;
1116 case RTE_FLOW_ITEM_TYPE_IPV6:
1117 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1121 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1122 mask.ipv6 = flow_tcf_item_mask
1123 (items, &rte_flow_item_ipv6_mask,
1124 &flow_tcf_mask_supported.ipv6,
1125 &flow_tcf_mask_empty.ipv6,
1126 sizeof(flow_tcf_mask_supported.ipv6),
1130 if (mask.ipv6->hdr.proto &&
1131 mask.ipv6->hdr.proto != 0xff)
1132 return rte_flow_error_set
1134 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1136 "no support for partial mask on"
1137 " \"hdr.proto\" field");
1138 else if (mask.ipv6->hdr.proto)
1140 ((const struct rte_flow_item_ipv6 *)
1141 (items->spec))->hdr.proto;
1143 case RTE_FLOW_ITEM_TYPE_UDP:
1144 ret = mlx5_flow_validate_item_udp(items, item_flags,
1145 next_protocol, error);
1148 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1149 mask.udp = flow_tcf_item_mask
1150 (items, &rte_flow_item_udp_mask,
1151 &flow_tcf_mask_supported.udp,
1152 &flow_tcf_mask_empty.udp,
1153 sizeof(flow_tcf_mask_supported.udp),
1158 case RTE_FLOW_ITEM_TYPE_TCP:
1159 ret = mlx5_flow_validate_item_tcp
1162 &flow_tcf_mask_supported.tcp,
1166 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1167 mask.tcp = flow_tcf_item_mask
1168 (items, &rte_flow_item_tcp_mask,
1169 &flow_tcf_mask_supported.tcp,
1170 &flow_tcf_mask_empty.tcp,
1171 sizeof(flow_tcf_mask_supported.tcp),
1177 return rte_flow_error_set(error, ENOTSUP,
1178 RTE_FLOW_ERROR_TYPE_ITEM,
1179 NULL, "item not supported");
1182 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1184 uint32_t current_action_flag = 0;
1186 switch (actions->type) {
1187 case RTE_FLOW_ACTION_TYPE_VOID:
1189 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1190 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1193 conf.port_id = actions->conf;
1194 if (conf.port_id->original)
1197 for (i = 0; ptoi[i].ifindex; ++i)
1198 if (ptoi[i].port_id == conf.port_id->id)
1200 if (!ptoi[i].ifindex)
1201 return rte_flow_error_set
1203 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1205 "missing data to convert port ID to"
1207 port_id_dev = &rte_eth_devices[conf.port_id->id];
1209 case RTE_FLOW_ACTION_TYPE_JUMP:
1210 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1213 conf.jump = actions->conf;
1214 if (attr->group >= conf.jump->group)
1215 return rte_flow_error_set
1217 RTE_FLOW_ERROR_TYPE_ACTION,
1219 "can jump only to a group forward");
1221 case RTE_FLOW_ACTION_TYPE_DROP:
1222 current_action_flag = MLX5_FLOW_ACTION_DROP;
1224 case RTE_FLOW_ACTION_TYPE_COUNT:
1226 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1227 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1229 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1230 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1232 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1233 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1234 return rte_flow_error_set
1236 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1237 "vlan modify is not supported,"
1238 " set action must follow push action");
1239 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1241 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1242 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1243 return rte_flow_error_set
1245 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1246 "vlan modify is not supported,"
1247 " set action must follow push action");
1248 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1250 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1251 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1253 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1254 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1256 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1257 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1259 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1260 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1262 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1263 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1265 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1266 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1268 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1269 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1271 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1272 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1274 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1275 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1277 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1278 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1281 return rte_flow_error_set(error, ENOTSUP,
1282 RTE_FLOW_ERROR_TYPE_ACTION,
1284 "action not supported");
1286 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1288 return rte_flow_error_set(error, EINVAL,
1289 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1291 "action configuration not set");
1293 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1295 return rte_flow_error_set(error, ENOTSUP,
1296 RTE_FLOW_ERROR_TYPE_ACTION,
1298 "set actions should be "
1299 "listed successively");
1300 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1301 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1302 pedit_validated = 1;
1303 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1304 (action_flags & MLX5_TCF_FATE_ACTIONS))
1305 return rte_flow_error_set(error, EINVAL,
1306 RTE_FLOW_ERROR_TYPE_ACTION,
1308 "can't have multiple fate"
1310 action_flags |= current_action_flag;
1312 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1313 (action_flags & MLX5_FLOW_ACTION_DROP))
1314 return rte_flow_error_set(error, ENOTSUP,
1315 RTE_FLOW_ERROR_TYPE_ACTION,
1317 "set action is not compatible with "
1319 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1320 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1321 return rte_flow_error_set(error, ENOTSUP,
1322 RTE_FLOW_ERROR_TYPE_ACTION,
1324 "set action must be followed by "
1327 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1328 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1329 return rte_flow_error_set(error, EINVAL,
1330 RTE_FLOW_ERROR_TYPE_ACTION,
1332 "no ipv4 item found in"
1336 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1337 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1338 return rte_flow_error_set(error, EINVAL,
1339 RTE_FLOW_ERROR_TYPE_ACTION,
1341 "no ipv6 item found in"
1345 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1347 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1348 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1349 return rte_flow_error_set(error, EINVAL,
1350 RTE_FLOW_ERROR_TYPE_ACTION,
1352 "no TCP/UDP item found in"
1356 * FW syndrome (0xA9C090):
1357 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1358 * forward to the uplink.
1360 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1361 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1362 ((struct priv *)port_id_dev->data->dev_private)->representor)
1363 return rte_flow_error_set(error, ENOTSUP,
1364 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1365 "vlan push can only be applied"
1366 " when forwarding to uplink port");
1368 * FW syndrome (0x294609):
1369 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1370 * are supported only while forwarding to vport.
1372 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1373 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1374 return rte_flow_error_set(error, ENOTSUP,
1375 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1376 "vlan actions are supported"
1377 " only with port_id action");
1378 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1379 return rte_flow_error_set(error, EINVAL,
1380 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1381 "no fate action is found");
1383 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1385 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1386 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1387 return rte_flow_error_set(error, EINVAL,
1388 RTE_FLOW_ERROR_TYPE_ACTION,
1390 "no IP found in pattern");
1393 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1394 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1395 return rte_flow_error_set(error, ENOTSUP,
1396 RTE_FLOW_ERROR_TYPE_ACTION,
1398 "no ethernet found in"
1405 * Calculate maximum size of memory for flow items of Linux TC flower and
1406 * extract specified items.
1409 * Pointer to the list of items.
1410 * @param[out] item_flags
1411 * Pointer to the detected items.
1414 * Maximum size of memory for items.
1417 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1418 const struct rte_flow_item items[],
1419 uint64_t *item_flags)
1424 size += SZ_NLATTR_STRZ_OF("flower") +
1425 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1426 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1427 if (attr->group > 0)
1428 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1429 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1430 switch (items->type) {
1431 case RTE_FLOW_ITEM_TYPE_VOID:
1433 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1435 case RTE_FLOW_ITEM_TYPE_ETH:
1436 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1437 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1438 /* dst/src MAC addr and mask. */
1439 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1441 case RTE_FLOW_ITEM_TYPE_VLAN:
1442 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1443 SZ_NLATTR_TYPE_OF(uint16_t) +
1444 /* VLAN Ether type. */
1445 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1446 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1447 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1449 case RTE_FLOW_ITEM_TYPE_IPV4:
1450 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1451 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1452 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1453 /* dst/src IP addr and mask. */
1454 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1456 case RTE_FLOW_ITEM_TYPE_IPV6:
1457 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1458 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1459 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1460 /* dst/src IP addr and mask. */
1461 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1463 case RTE_FLOW_ITEM_TYPE_UDP:
1464 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1465 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1466 /* dst/src port and mask. */
1467 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1469 case RTE_FLOW_ITEM_TYPE_TCP:
1470 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1471 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1472 /* dst/src port and mask. */
1473 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1477 "unsupported item %p type %d,"
1478 " items must be validated before flow creation",
1479 (const void *)items, items->type);
1483 *item_flags = flags;
1488 * Calculate maximum size of memory for flow actions of Linux TC flower and
1489 * extract specified actions.
1491 * @param[in] actions
1492 * Pointer to the list of actions.
1493 * @param[out] action_flags
1494 * Pointer to the detected actions.
1497 * Maximum size of memory for actions.
1500 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1501 uint64_t *action_flags)
1506 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1507 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1508 switch (actions->type) {
1509 case RTE_FLOW_ACTION_TYPE_VOID:
1511 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1512 size += SZ_NLATTR_NEST + /* na_act_index. */
1513 SZ_NLATTR_STRZ_OF("mirred") +
1514 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1515 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1516 flags |= MLX5_FLOW_ACTION_PORT_ID;
1518 case RTE_FLOW_ACTION_TYPE_JUMP:
1519 size += SZ_NLATTR_NEST + /* na_act_index. */
1520 SZ_NLATTR_STRZ_OF("gact") +
1521 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1522 SZ_NLATTR_TYPE_OF(struct tc_gact);
1523 flags |= MLX5_FLOW_ACTION_JUMP;
1525 case RTE_FLOW_ACTION_TYPE_DROP:
1526 size += SZ_NLATTR_NEST + /* na_act_index. */
1527 SZ_NLATTR_STRZ_OF("gact") +
1528 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1529 SZ_NLATTR_TYPE_OF(struct tc_gact);
1530 flags |= MLX5_FLOW_ACTION_DROP;
1532 case RTE_FLOW_ACTION_TYPE_COUNT:
1534 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1535 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1536 goto action_of_vlan;
1537 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1538 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1539 goto action_of_vlan;
1540 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1541 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1542 goto action_of_vlan;
1543 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1544 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1545 goto action_of_vlan;
1547 size += SZ_NLATTR_NEST + /* na_act_index. */
1548 SZ_NLATTR_STRZ_OF("vlan") +
1549 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1550 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1551 SZ_NLATTR_TYPE_OF(uint16_t) +
1552 /* VLAN protocol. */
1553 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1554 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1556 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1557 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1558 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1559 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1560 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1561 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1562 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1563 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1564 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1565 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1566 size += flow_tcf_get_pedit_actions_size(&actions,
1571 "unsupported action %p type %d,"
1572 " items must be validated before flow creation",
1573 (const void *)actions, actions->type);
1577 *action_flags = flags;
1582 * Brand rtnetlink buffer with unique handle.
1584 * This handle should be unique for a given network interface to avoid
1588 * Pointer to Netlink message.
1590 * Unique 32-bit handle to use.
1593 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1595 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1597 tcm->tcm_handle = handle;
1598 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1599 (void *)nlh, handle);
1603 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1604 * memory required, allocates the memory, initializes Netlink message headers
1605 * and set unique TC message handle.
1608 * Pointer to the flow attributes.
1610 * Pointer to the list of items.
1611 * @param[in] actions
1612 * Pointer to the list of actions.
1613 * @param[out] item_flags
1614 * Pointer to bit mask of all items detected.
1615 * @param[out] action_flags
1616 * Pointer to bit mask of all actions detected.
1618 * Pointer to the error structure.
1621 * Pointer to mlx5_flow object on success,
1622 * otherwise NULL and rte_ernno is set.
1624 static struct mlx5_flow *
1625 flow_tcf_prepare(const struct rte_flow_attr *attr,
1626 const struct rte_flow_item items[],
1627 const struct rte_flow_action actions[],
1628 uint64_t *item_flags, uint64_t *action_flags,
1629 struct rte_flow_error *error)
1631 size_t size = sizeof(struct mlx5_flow) +
1632 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1633 MNL_ALIGN(sizeof(struct tcmsg));
1634 struct mlx5_flow *dev_flow;
1635 struct nlmsghdr *nlh;
1638 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1639 size += flow_tcf_get_actions_and_size(actions, action_flags);
1640 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1642 rte_flow_error_set(error, ENOMEM,
1643 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1644 "not enough memory to create E-Switch flow");
1647 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1648 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1649 *dev_flow = (struct mlx5_flow){
1650 .tcf = (struct mlx5_flow_tcf){
1656 * Generate a reasonably unique handle based on the address of the
1659 * This is straightforward on 32-bit systems where the flow pointer can
1660 * be used directly. Otherwise, its least significant part is taken
1661 * after shifting it by the previous power of two of the pointed buffer
1664 if (sizeof(dev_flow) <= 4)
1665 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1667 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1668 rte_log2_u32(rte_align32prevpow2(size)));
1673 * Make adjustments for supporting count actions.
1676 * Pointer to the Ethernet device structure.
1677 * @param[in] dev_flow
1678 * Pointer to mlx5_flow.
1680 * Pointer to error structure.
1683 * 0 On success else a negative errno value is returned and rte_errno is set.
1686 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1687 struct mlx5_flow *dev_flow,
1688 struct rte_flow_error *error)
1690 struct rte_flow *flow = dev_flow->flow;
1692 if (!flow->counter) {
1693 flow->counter = flow_tcf_counter_new();
1695 return rte_flow_error_set(error, rte_errno,
1696 RTE_FLOW_ERROR_TYPE_ACTION,
1698 "cannot get counter"
1705 * Translate flow for Linux TC flower and construct Netlink message.
1708 * Pointer to the priv structure.
1709 * @param[in, out] flow
1710 * Pointer to the sub flow.
1712 * Pointer to the flow attributes.
1714 * Pointer to the list of items.
1715 * @param[in] actions
1716 * Pointer to the list of actions.
1718 * Pointer to the error structure.
1721 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1724 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1725 const struct rte_flow_attr *attr,
1726 const struct rte_flow_item items[],
1727 const struct rte_flow_action actions[],
1728 struct rte_flow_error *error)
1731 const struct rte_flow_item_port_id *port_id;
1732 const struct rte_flow_item_eth *eth;
1733 const struct rte_flow_item_vlan *vlan;
1734 const struct rte_flow_item_ipv4 *ipv4;
1735 const struct rte_flow_item_ipv6 *ipv6;
1736 const struct rte_flow_item_tcp *tcp;
1737 const struct rte_flow_item_udp *udp;
1740 const struct rte_flow_action_port_id *port_id;
1741 const struct rte_flow_action_jump *jump;
1742 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1743 const struct rte_flow_action_of_set_vlan_vid *
1745 const struct rte_flow_action_of_set_vlan_pcp *
1748 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1749 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1750 struct tcmsg *tcm = dev_flow->tcf.tcm;
1751 uint32_t na_act_index_cur;
1752 bool eth_type_set = 0;
1753 bool vlan_present = 0;
1754 bool vlan_eth_type_set = 0;
1755 bool ip_proto_set = 0;
1756 struct nlattr *na_flower;
1757 struct nlattr *na_flower_act;
1758 struct nlattr *na_vlan_id = NULL;
1759 struct nlattr *na_vlan_priority = NULL;
1760 uint64_t item_flags = 0;
1763 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1764 PTOI_TABLE_SZ_MAX(dev)));
1765 nlh = dev_flow->tcf.nlh;
1766 tcm = dev_flow->tcf.tcm;
1767 /* Prepare API must have been called beforehand. */
1768 assert(nlh != NULL && tcm != NULL);
1769 tcm->tcm_family = AF_UNSPEC;
1770 tcm->tcm_ifindex = ptoi[0].ifindex;
1771 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1773 * Priority cannot be zero to prevent the kernel from picking one
1776 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1777 RTE_BE16(ETH_P_ALL));
1778 if (attr->group > 0)
1779 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1780 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1781 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1782 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1783 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1786 switch (items->type) {
1787 case RTE_FLOW_ITEM_TYPE_VOID:
1789 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1790 mask.port_id = flow_tcf_item_mask
1791 (items, &rte_flow_item_port_id_mask,
1792 &flow_tcf_mask_supported.port_id,
1793 &flow_tcf_mask_empty.port_id,
1794 sizeof(flow_tcf_mask_supported.port_id),
1796 assert(mask.port_id);
1797 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1799 spec.port_id = items->spec;
1800 if (!mask.port_id->id)
1803 for (i = 0; ptoi[i].ifindex; ++i)
1804 if (ptoi[i].port_id == spec.port_id->id)
1806 assert(ptoi[i].ifindex);
1807 tcm->tcm_ifindex = ptoi[i].ifindex;
1809 case RTE_FLOW_ITEM_TYPE_ETH:
1810 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1811 mask.eth = flow_tcf_item_mask
1812 (items, &rte_flow_item_eth_mask,
1813 &flow_tcf_mask_supported.eth,
1814 &flow_tcf_mask_empty.eth,
1815 sizeof(flow_tcf_mask_supported.eth),
1818 if (mask.eth == &flow_tcf_mask_empty.eth)
1820 spec.eth = items->spec;
1821 if (mask.eth->type) {
1822 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1826 if (!is_zero_ether_addr(&mask.eth->dst)) {
1827 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1829 spec.eth->dst.addr_bytes);
1830 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1832 mask.eth->dst.addr_bytes);
1834 if (!is_zero_ether_addr(&mask.eth->src)) {
1835 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1837 spec.eth->src.addr_bytes);
1838 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1840 mask.eth->src.addr_bytes);
1843 case RTE_FLOW_ITEM_TYPE_VLAN:
1844 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1845 mask.vlan = flow_tcf_item_mask
1846 (items, &rte_flow_item_vlan_mask,
1847 &flow_tcf_mask_supported.vlan,
1848 &flow_tcf_mask_empty.vlan,
1849 sizeof(flow_tcf_mask_supported.vlan),
1853 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1854 RTE_BE16(ETH_P_8021Q));
1857 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1859 spec.vlan = items->spec;
1860 if (mask.vlan->inner_type) {
1861 mnl_attr_put_u16(nlh,
1862 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1863 spec.vlan->inner_type);
1864 vlan_eth_type_set = 1;
1866 if (mask.vlan->tci & RTE_BE16(0xe000))
1867 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1869 (spec.vlan->tci) >> 13) & 0x7);
1870 if (mask.vlan->tci & RTE_BE16(0x0fff))
1871 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1876 case RTE_FLOW_ITEM_TYPE_IPV4:
1877 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1878 mask.ipv4 = flow_tcf_item_mask
1879 (items, &rte_flow_item_ipv4_mask,
1880 &flow_tcf_mask_supported.ipv4,
1881 &flow_tcf_mask_empty.ipv4,
1882 sizeof(flow_tcf_mask_supported.ipv4),
1885 if (!eth_type_set || !vlan_eth_type_set)
1886 mnl_attr_put_u16(nlh,
1888 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1889 TCA_FLOWER_KEY_ETH_TYPE,
1890 RTE_BE16(ETH_P_IP));
1892 vlan_eth_type_set = 1;
1893 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1895 spec.ipv4 = items->spec;
1896 if (mask.ipv4->hdr.next_proto_id) {
1897 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1898 spec.ipv4->hdr.next_proto_id);
1901 if (mask.ipv4->hdr.src_addr) {
1902 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1903 spec.ipv4->hdr.src_addr);
1904 mnl_attr_put_u32(nlh,
1905 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1906 mask.ipv4->hdr.src_addr);
1908 if (mask.ipv4->hdr.dst_addr) {
1909 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1910 spec.ipv4->hdr.dst_addr);
1911 mnl_attr_put_u32(nlh,
1912 TCA_FLOWER_KEY_IPV4_DST_MASK,
1913 mask.ipv4->hdr.dst_addr);
1916 case RTE_FLOW_ITEM_TYPE_IPV6:
1917 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1918 mask.ipv6 = flow_tcf_item_mask
1919 (items, &rte_flow_item_ipv6_mask,
1920 &flow_tcf_mask_supported.ipv6,
1921 &flow_tcf_mask_empty.ipv6,
1922 sizeof(flow_tcf_mask_supported.ipv6),
1925 if (!eth_type_set || !vlan_eth_type_set)
1926 mnl_attr_put_u16(nlh,
1928 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1929 TCA_FLOWER_KEY_ETH_TYPE,
1930 RTE_BE16(ETH_P_IPV6));
1932 vlan_eth_type_set = 1;
1933 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1935 spec.ipv6 = items->spec;
1936 if (mask.ipv6->hdr.proto) {
1937 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1938 spec.ipv6->hdr.proto);
1941 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1942 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1943 sizeof(spec.ipv6->hdr.src_addr),
1944 spec.ipv6->hdr.src_addr);
1945 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1946 sizeof(mask.ipv6->hdr.src_addr),
1947 mask.ipv6->hdr.src_addr);
1949 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1950 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1951 sizeof(spec.ipv6->hdr.dst_addr),
1952 spec.ipv6->hdr.dst_addr);
1953 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1954 sizeof(mask.ipv6->hdr.dst_addr),
1955 mask.ipv6->hdr.dst_addr);
1958 case RTE_FLOW_ITEM_TYPE_UDP:
1959 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1960 mask.udp = flow_tcf_item_mask
1961 (items, &rte_flow_item_udp_mask,
1962 &flow_tcf_mask_supported.udp,
1963 &flow_tcf_mask_empty.udp,
1964 sizeof(flow_tcf_mask_supported.udp),
1968 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1970 if (mask.udp == &flow_tcf_mask_empty.udp)
1972 spec.udp = items->spec;
1973 if (mask.udp->hdr.src_port) {
1974 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1975 spec.udp->hdr.src_port);
1976 mnl_attr_put_u16(nlh,
1977 TCA_FLOWER_KEY_UDP_SRC_MASK,
1978 mask.udp->hdr.src_port);
1980 if (mask.udp->hdr.dst_port) {
1981 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1982 spec.udp->hdr.dst_port);
1983 mnl_attr_put_u16(nlh,
1984 TCA_FLOWER_KEY_UDP_DST_MASK,
1985 mask.udp->hdr.dst_port);
1988 case RTE_FLOW_ITEM_TYPE_TCP:
1989 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1990 mask.tcp = flow_tcf_item_mask
1991 (items, &rte_flow_item_tcp_mask,
1992 &flow_tcf_mask_supported.tcp,
1993 &flow_tcf_mask_empty.tcp,
1994 sizeof(flow_tcf_mask_supported.tcp),
1998 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2000 if (mask.tcp == &flow_tcf_mask_empty.tcp)
2002 spec.tcp = items->spec;
2003 if (mask.tcp->hdr.src_port) {
2004 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2005 spec.tcp->hdr.src_port);
2006 mnl_attr_put_u16(nlh,
2007 TCA_FLOWER_KEY_TCP_SRC_MASK,
2008 mask.tcp->hdr.src_port);
2010 if (mask.tcp->hdr.dst_port) {
2011 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2012 spec.tcp->hdr.dst_port);
2013 mnl_attr_put_u16(nlh,
2014 TCA_FLOWER_KEY_TCP_DST_MASK,
2015 mask.tcp->hdr.dst_port);
2017 if (mask.tcp->hdr.tcp_flags) {
2020 TCA_FLOWER_KEY_TCP_FLAGS,
2022 (spec.tcp->hdr.tcp_flags));
2025 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2027 (mask.tcp->hdr.tcp_flags));
2031 return rte_flow_error_set(error, ENOTSUP,
2032 RTE_FLOW_ERROR_TYPE_ITEM,
2033 NULL, "item not supported");
2036 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2037 na_act_index_cur = 1;
2038 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2039 struct nlattr *na_act_index;
2040 struct nlattr *na_act;
2041 unsigned int vlan_act;
2044 switch (actions->type) {
2045 case RTE_FLOW_ACTION_TYPE_VOID:
2047 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2048 conf.port_id = actions->conf;
2049 if (conf.port_id->original)
2052 for (i = 0; ptoi[i].ifindex; ++i)
2053 if (ptoi[i].port_id == conf.port_id->id)
2055 assert(ptoi[i].ifindex);
2057 mnl_attr_nest_start(nlh, na_act_index_cur++);
2058 assert(na_act_index);
2059 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2060 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2062 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2063 sizeof(struct tc_mirred),
2064 &(struct tc_mirred){
2065 .action = TC_ACT_STOLEN,
2066 .eaction = TCA_EGRESS_REDIR,
2067 .ifindex = ptoi[i].ifindex,
2069 mnl_attr_nest_end(nlh, na_act);
2070 mnl_attr_nest_end(nlh, na_act_index);
2072 case RTE_FLOW_ACTION_TYPE_JUMP:
2073 conf.jump = actions->conf;
2075 mnl_attr_nest_start(nlh, na_act_index_cur++);
2076 assert(na_act_index);
2077 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2078 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2080 mnl_attr_put(nlh, TCA_GACT_PARMS,
2081 sizeof(struct tc_gact),
2083 .action = TC_ACT_GOTO_CHAIN |
2086 mnl_attr_nest_end(nlh, na_act);
2087 mnl_attr_nest_end(nlh, na_act_index);
2089 case RTE_FLOW_ACTION_TYPE_DROP:
2091 mnl_attr_nest_start(nlh, na_act_index_cur++);
2092 assert(na_act_index);
2093 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2094 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2096 mnl_attr_put(nlh, TCA_GACT_PARMS,
2097 sizeof(struct tc_gact),
2099 .action = TC_ACT_SHOT,
2101 mnl_attr_nest_end(nlh, na_act);
2102 mnl_attr_nest_end(nlh, na_act_index);
2104 case RTE_FLOW_ACTION_TYPE_COUNT:
2106 * Driver adds the count action implicitly for
2107 * each rule it creates.
2109 ret = flow_tcf_translate_action_count(dev,
2114 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2115 conf.of_push_vlan = NULL;
2116 vlan_act = TCA_VLAN_ACT_POP;
2117 goto action_of_vlan;
2118 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2119 conf.of_push_vlan = actions->conf;
2120 vlan_act = TCA_VLAN_ACT_PUSH;
2121 goto action_of_vlan;
2122 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2123 conf.of_set_vlan_vid = actions->conf;
2125 goto override_na_vlan_id;
2126 vlan_act = TCA_VLAN_ACT_MODIFY;
2127 goto action_of_vlan;
2128 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2129 conf.of_set_vlan_pcp = actions->conf;
2130 if (na_vlan_priority)
2131 goto override_na_vlan_priority;
2132 vlan_act = TCA_VLAN_ACT_MODIFY;
2133 goto action_of_vlan;
2136 mnl_attr_nest_start(nlh, na_act_index_cur++);
2137 assert(na_act_index);
2138 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2139 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2141 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2142 sizeof(struct tc_vlan),
2144 .action = TC_ACT_PIPE,
2145 .v_action = vlan_act,
2147 if (vlan_act == TCA_VLAN_ACT_POP) {
2148 mnl_attr_nest_end(nlh, na_act);
2149 mnl_attr_nest_end(nlh, na_act_index);
2152 if (vlan_act == TCA_VLAN_ACT_PUSH)
2153 mnl_attr_put_u16(nlh,
2154 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2155 conf.of_push_vlan->ethertype);
2156 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2157 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2158 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2159 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2160 mnl_attr_nest_end(nlh, na_act);
2161 mnl_attr_nest_end(nlh, na_act_index);
2162 if (actions->type ==
2163 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2164 override_na_vlan_id:
2165 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2166 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2168 (conf.of_set_vlan_vid->vlan_vid);
2169 } else if (actions->type ==
2170 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2171 override_na_vlan_priority:
2172 na_vlan_priority->nla_type =
2173 TCA_VLAN_PUSH_VLAN_PRIORITY;
2174 *(uint8_t *)mnl_attr_get_payload
2175 (na_vlan_priority) =
2176 conf.of_set_vlan_pcp->vlan_pcp;
2179 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2180 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2181 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2182 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2183 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2184 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2185 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2186 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2187 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2188 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2190 mnl_attr_nest_start(nlh, na_act_index_cur++);
2191 flow_tcf_create_pedit_mnl_msg(nlh,
2192 &actions, item_flags);
2193 mnl_attr_nest_end(nlh, na_act_index);
2196 return rte_flow_error_set(error, ENOTSUP,
2197 RTE_FLOW_ERROR_TYPE_ACTION,
2199 "action not supported");
2203 assert(na_flower_act);
2204 mnl_attr_nest_end(nlh, na_flower_act);
2205 mnl_attr_nest_end(nlh, na_flower);
2210 * Send Netlink message with acknowledgment.
2213 * Flow context to use.
2215 * Message to send. This function always raises the NLM_F_ACK flag before
2219 * 0 on success, a negative errno value otherwise and rte_errno is set.
2222 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2224 alignas(struct nlmsghdr)
2225 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2226 nlh->nlmsg_len - sizeof(*nlh)];
2227 uint32_t seq = ctx->seq++;
2228 struct mnl_socket *nl = ctx->nl;
2231 nlh->nlmsg_flags |= NLM_F_ACK;
2232 nlh->nlmsg_seq = seq;
2233 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2235 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2238 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2246 * Apply flow to E-Switch by sending Netlink message.
2249 * Pointer to Ethernet device.
2250 * @param[in, out] flow
2251 * Pointer to the sub flow.
2253 * Pointer to the error structure.
2256 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2259 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2260 struct rte_flow_error *error)
2262 struct priv *priv = dev->data->dev_private;
2263 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2264 struct mlx5_flow *dev_flow;
2265 struct nlmsghdr *nlh;
2267 dev_flow = LIST_FIRST(&flow->dev_flows);
2268 /* E-Switch flow can't be expanded. */
2269 assert(!LIST_NEXT(dev_flow, next));
2270 nlh = dev_flow->tcf.nlh;
2271 nlh->nlmsg_type = RTM_NEWTFILTER;
2272 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2273 if (!flow_tcf_nl_ack(ctx, nlh))
2275 return rte_flow_error_set(error, rte_errno,
2276 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2277 "netlink: failed to create TC flow rule");
2281 * Remove flow from E-Switch by sending Netlink message.
2284 * Pointer to Ethernet device.
2285 * @param[in, out] flow
2286 * Pointer to the sub flow.
2289 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2291 struct priv *priv = dev->data->dev_private;
2292 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2293 struct mlx5_flow *dev_flow;
2294 struct nlmsghdr *nlh;
2298 if (flow->counter) {
2299 if (--flow->counter->ref_cnt == 0) {
2300 rte_free(flow->counter);
2301 flow->counter = NULL;
2304 dev_flow = LIST_FIRST(&flow->dev_flows);
2307 /* E-Switch flow can't be expanded. */
2308 assert(!LIST_NEXT(dev_flow, next));
2309 nlh = dev_flow->tcf.nlh;
2310 nlh->nlmsg_type = RTM_DELTFILTER;
2311 nlh->nlmsg_flags = NLM_F_REQUEST;
2312 flow_tcf_nl_ack(ctx, nlh);
2316 * Remove flow from E-Switch and release resources of the device flow.
2319 * Pointer to Ethernet device.
2320 * @param[in, out] flow
2321 * Pointer to the sub flow.
2324 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2326 struct mlx5_flow *dev_flow;
2330 flow_tcf_remove(dev, flow);
2331 dev_flow = LIST_FIRST(&flow->dev_flows);
2334 /* E-Switch flow can't be expanded. */
2335 assert(!LIST_NEXT(dev_flow, next));
2336 LIST_REMOVE(dev_flow, next);
2341 * Helper routine for figuring the space size required for a parse buffer.
2344 * array of values to use.
2346 * Current location in array.
2348 * Value to compare with.
2351 * The maximum between the given value and the array value on index.
2354 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2356 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2360 * Parse rtnetlink message attributes filling the attribute table with the info
2364 * Attribute table to be filled.
2366 * Maxinum entry in the attribute table.
2368 * The attributes section in the message to be parsed.
2370 * The length of the attributes section in the message.
2373 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2374 struct rtattr *rta, int len)
2376 unsigned short type;
2377 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2378 while (RTA_OK(rta, len)) {
2379 type = rta->rta_type;
2380 if (type <= max && !tb[type])
2382 rta = RTA_NEXT(rta, len);
2387 * Extract flow counters from flower action.
2390 * flower action stats properties in the Netlink message received.
2392 * The backward sequence of rta_types, as written in the attribute table,
2393 * we need to traverse in order to get to the requested object.
2395 * Current location in rta_type table.
2397 * data holding the count statistics of the rte_flow retrieved from
2401 * 0 if data was found and retrieved, -1 otherwise.
2404 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2405 uint16_t rta_type[], int idx,
2406 struct gnet_stats_basic *data)
2408 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2410 struct rtattr *tbs[tca_stats_max + 1];
2412 if (rta == NULL || idx < 0)
2414 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2415 RTA_DATA(rta), RTA_PAYLOAD(rta));
2416 switch (rta_type[idx]) {
2417 case TCA_STATS_BASIC:
2418 if (tbs[TCA_STATS_BASIC]) {
2419 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2420 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2432 * Parse flower single action retrieving the requested action attribute,
2436 * flower action properties in the Netlink message received.
2438 * The backward sequence of rta_types, as written in the attribute table,
2439 * we need to traverse in order to get to the requested object.
2441 * Current location in rta_type table.
2443 * Count statistics retrieved from the message query.
2446 * 0 if data was found and retrieved, -1 otherwise.
2449 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2450 uint16_t rta_type[], int idx, void *data)
2452 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2453 struct rtattr *tb[tca_act_max + 1];
2455 if (arg == NULL || idx < 0)
2457 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2458 RTA_DATA(arg), RTA_PAYLOAD(arg));
2459 if (tb[TCA_ACT_KIND] == NULL)
2461 switch (rta_type[idx]) {
2463 if (tb[TCA_ACT_STATS])
2464 return flow_tcf_nl_action_stats_parse_and_get
2467 (struct gnet_stats_basic *)data);
2476 * Parse flower action section in the message retrieving the requested
2477 * attribute from the first action that provides it.
2480 * flower section in the Netlink message received.
2482 * The backward sequence of rta_types, as written in the attribute table,
2483 * we need to traverse in order to get to the requested object.
2485 * Current location in rta_type table.
2487 * data retrieved from the message query.
2490 * 0 if data was found and retrieved, -1 otherwise.
2493 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2494 uint16_t rta_type[], int idx, void *data)
2496 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2499 if (arg == NULL || idx < 0)
2501 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2502 RTA_DATA(arg), RTA_PAYLOAD(arg));
2503 switch (rta_type[idx]) {
2505 * flow counters are stored in the actions defined by the flow
2506 * and not in the flow itself, therefore we need to traverse the
2507 * flower chain of actions in search for them.
2509 * Note that the index is not decremented here.
2512 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2514 !flow_tcf_nl_parse_one_action_and_get(tb[i],
2527 * Parse flower classifier options in the message, retrieving the requested
2528 * attribute if found.
2531 * flower section in the Netlink message received.
2533 * The backward sequence of rta_types, as written in the attribute table,
2534 * we need to traverse in order to get to the requested object.
2536 * Current location in rta_type table.
2538 * data retrieved from the message query.
2541 * 0 if data was found and retrieved, -1 otherwise.
2544 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2545 uint16_t rta_type[], int idx, void *data)
2547 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2549 struct rtattr *tb[tca_flower_max + 1];
2551 if (!opt || idx < 0)
2553 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2554 RTA_DATA(opt), RTA_PAYLOAD(opt));
2555 switch (rta_type[idx]) {
2556 case TCA_FLOWER_ACT:
2557 if (tb[TCA_FLOWER_ACT])
2558 return flow_tcf_nl_action_parse_and_get
2559 (tb[TCA_FLOWER_ACT],
2560 rta_type, --idx, data);
2569 * Parse Netlink reply on filter query, retrieving the flow counters.
2572 * Message received from Netlink.
2574 * The backward sequence of rta_types, as written in the attribute table,
2575 * we need to traverse in order to get to the requested object.
2577 * Current location in rta_type table.
2579 * data retrieved from the message query.
2582 * 0 if data was found and retrieved, -1 otherwise.
2585 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2586 uint16_t rta_type[], int idx, void *data)
2588 struct nlmsghdr *nlh = cnlh;
2589 struct tcmsg *t = NLMSG_DATA(nlh);
2590 int len = nlh->nlmsg_len;
2591 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2592 struct rtattr *tb[tca_max + 1];
2596 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2597 nlh->nlmsg_type != RTM_GETTFILTER &&
2598 nlh->nlmsg_type != RTM_DELTFILTER)
2600 len -= NLMSG_LENGTH(sizeof(*t));
2603 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2604 /* Not a TC flower flow - bail out */
2605 if (!tb[TCA_KIND] ||
2606 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2608 switch (rta_type[idx]) {
2610 if (tb[TCA_OPTIONS])
2611 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2622 * A callback to parse Netlink reply on TC flower query.
2625 * Message received from Netlink.
2627 * Pointer to data area to be filled by the parsing routine.
2628 * assumed to be a pinter to struct flow_tcf_stats_basic.
2634 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2637 * The backward sequence of rta_types to pass in order to get
2640 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2641 TCA_FLOWER_ACT, TCA_OPTIONS };
2642 struct flow_tcf_stats_basic *sb_data = data;
2644 const struct nlmsghdr *c;
2645 struct nlmsghdr *nc;
2646 } tnlh = { .c = nlh };
2648 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2649 RTE_DIM(rta_type) - 1,
2650 (void *)&sb_data->counters))
2651 sb_data->valid = true;
2656 * Query a TC flower rule for its statistics via netlink.
2659 * Pointer to Ethernet device.
2661 * Pointer to the sub flow.
2663 * data retrieved by the query.
2665 * Perform verbose error reporting if not NULL.
2668 * 0 on success, a negative errno value otherwise and rte_errno is set.
2671 flow_tcf_query_count(struct rte_eth_dev *dev,
2672 struct rte_flow *flow,
2674 struct rte_flow_error *error)
2676 struct flow_tcf_stats_basic sb_data = { 0 };
2677 struct rte_flow_query_count *qc = data;
2678 struct priv *priv = dev->data->dev_private;
2679 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2680 struct mnl_socket *nl = ctx->nl;
2681 struct mlx5_flow *dev_flow;
2682 struct nlmsghdr *nlh;
2683 uint32_t seq = priv->tcf_context->seq++;
2687 dev_flow = LIST_FIRST(&flow->dev_flows);
2688 /* E-Switch flow can't be expanded. */
2689 assert(!LIST_NEXT(dev_flow, next));
2690 if (!dev_flow->flow->counter)
2692 nlh = dev_flow->tcf.nlh;
2693 nlh->nlmsg_type = RTM_GETTFILTER;
2694 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2695 nlh->nlmsg_seq = seq;
2696 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2699 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2702 ret = mnl_cb_run(ctx->buf, ret, seq,
2703 mnl_socket_get_portid(nl),
2704 flow_tcf_nl_message_get_stats_basic,
2707 /* Return the delta from last reset. */
2708 if (sb_data.valid) {
2709 /* Return the delta from last reset. */
2712 qc->hits = sb_data.counters.packets - flow->counter->hits;
2713 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2715 flow->counter->hits = sb_data.counters.packets;
2716 flow->counter->bytes = sb_data.counters.bytes;
2720 return rte_flow_error_set(error, EINVAL,
2721 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2723 "flow does not have counter");
2725 return rte_flow_error_set
2726 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2727 NULL, "netlink: failed to read flow rule counters");
2729 return rte_flow_error_set
2730 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2731 NULL, "counters are not available.");
2737 * @see rte_flow_query()
2741 flow_tcf_query(struct rte_eth_dev *dev,
2742 struct rte_flow *flow,
2743 const struct rte_flow_action *actions,
2745 struct rte_flow_error *error)
2749 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2750 switch (actions->type) {
2751 case RTE_FLOW_ACTION_TYPE_VOID:
2753 case RTE_FLOW_ACTION_TYPE_COUNT:
2754 ret = flow_tcf_query_count(dev, flow, data, error);
2757 return rte_flow_error_set(error, ENOTSUP,
2758 RTE_FLOW_ERROR_TYPE_ACTION,
2760 "action not supported");
2766 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2767 .validate = flow_tcf_validate,
2768 .prepare = flow_tcf_prepare,
2769 .translate = flow_tcf_translate,
2770 .apply = flow_tcf_apply,
2771 .remove = flow_tcf_remove,
2772 .destroy = flow_tcf_destroy,
2773 .query = flow_tcf_query,
2777 * Create and configure a libmnl socket for Netlink flow rules.
2780 * A valid libmnl socket object pointer on success, NULL otherwise and
2783 static struct mnl_socket *
2784 flow_tcf_mnl_socket_create(void)
2786 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2789 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2791 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2796 mnl_socket_close(nl);
2801 * Destroy a libmnl socket.
2804 * Libmnl socket of the @p NETLINK_ROUTE kind.
2807 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2810 mnl_socket_close(nl);
2814 * Initialize ingress qdisc of a given network interface.
2817 * Pointer to tc-flower context to use.
2819 * Index of network interface to initialize.
2821 * Perform verbose error reporting if not NULL.
2824 * 0 on success, a negative errno value otherwise and rte_errno is set.
2827 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2828 unsigned int ifindex, struct rte_flow_error *error)
2830 struct nlmsghdr *nlh;
2832 alignas(struct nlmsghdr)
2833 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2835 /* Destroy existing ingress qdisc and everything attached to it. */
2836 nlh = mnl_nlmsg_put_header(buf);
2837 nlh->nlmsg_type = RTM_DELQDISC;
2838 nlh->nlmsg_flags = NLM_F_REQUEST;
2839 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2840 tcm->tcm_family = AF_UNSPEC;
2841 tcm->tcm_ifindex = ifindex;
2842 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2843 tcm->tcm_parent = TC_H_INGRESS;
2844 /* Ignore errors when qdisc is already absent. */
2845 if (flow_tcf_nl_ack(ctx, nlh) &&
2846 rte_errno != EINVAL && rte_errno != ENOENT)
2847 return rte_flow_error_set(error, rte_errno,
2848 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2849 "netlink: failed to remove ingress"
2851 /* Create fresh ingress qdisc. */
2852 nlh = mnl_nlmsg_put_header(buf);
2853 nlh->nlmsg_type = RTM_NEWQDISC;
2854 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2855 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2856 tcm->tcm_family = AF_UNSPEC;
2857 tcm->tcm_ifindex = ifindex;
2858 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2859 tcm->tcm_parent = TC_H_INGRESS;
2860 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2861 if (flow_tcf_nl_ack(ctx, nlh))
2862 return rte_flow_error_set(error, rte_errno,
2863 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2864 "netlink: failed to create ingress"
2870 * Create libmnl context for Netlink flow rules.
2873 * A valid libmnl socket object pointer on success, NULL otherwise and
2876 struct mlx5_flow_tcf_context *
2877 mlx5_flow_tcf_context_create(void)
2879 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2884 ctx->nl = flow_tcf_mnl_socket_create();
2887 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2888 ctx->buf = rte_zmalloc(__func__,
2889 ctx->buf_size, sizeof(uint32_t));
2892 ctx->seq = random();
2895 mlx5_flow_tcf_context_destroy(ctx);
2900 * Destroy a libmnl context.
2903 * Libmnl socket of the @p NETLINK_ROUTE kind.
2906 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2910 flow_tcf_mnl_socket_destroy(ctx->nl);