1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 /* Normally found in linux/netlink.h. */
117 #ifndef NETLINK_CAP_ACK
118 #define NETLINK_CAP_ACK 10
121 /* Normally found in linux/pkt_sched.h. */
122 #ifndef TC_H_MIN_INGRESS
123 #define TC_H_MIN_INGRESS 0xfff2u
126 /* Normally found in linux/pkt_cls.h. */
127 #ifndef TCA_CLS_FLAGS_SKIP_SW
128 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
130 #ifndef HAVE_TCA_CHAIN
133 #ifndef HAVE_TCA_FLOWER_ACT
134 #define TCA_FLOWER_ACT 3
136 #ifndef HAVE_TCA_FLOWER_FLAGS
137 #define TCA_FLOWER_FLAGS 22
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
140 #define TCA_FLOWER_KEY_ETH_TYPE 8
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
143 #define TCA_FLOWER_KEY_ETH_DST 4
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
146 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
149 #define TCA_FLOWER_KEY_ETH_SRC 6
151 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
152 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
154 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
155 #define TCA_FLOWER_KEY_IP_PROTO 9
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
158 #define TCA_FLOWER_KEY_IPV4_SRC 10
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
161 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
164 #define TCA_FLOWER_KEY_IPV4_DST 12
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
167 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
170 #define TCA_FLOWER_KEY_IPV6_SRC 14
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
173 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
176 #define TCA_FLOWER_KEY_IPV6_DST 16
178 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
179 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
182 #define TCA_FLOWER_KEY_TCP_SRC 18
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
185 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
188 #define TCA_FLOWER_KEY_TCP_DST 19
190 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
191 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
194 #define TCA_FLOWER_KEY_UDP_SRC 20
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
197 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
200 #define TCA_FLOWER_KEY_UDP_DST 21
202 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
203 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
206 #define TCA_FLOWER_KEY_VLAN_ID 23
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
209 #define TCA_FLOWER_KEY_VLAN_PRIO 24
211 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
212 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
215 #define TCA_FLOWER_KEY_TCP_FLAGS 71
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
218 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
220 #ifndef HAVE_TC_ACT_GOTO_CHAIN
221 #define TC_ACT_GOTO_CHAIN 0x20000000
224 #ifndef IPV6_ADDR_LEN
225 #define IPV6_ADDR_LEN 16
228 #ifndef IPV4_ADDR_LEN
229 #define IPV4_ADDR_LEN 4
233 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
240 #ifndef TCA_ACT_MAX_PRIO
241 #define TCA_ACT_MAX_PRIO 32
245 * Structure for holding netlink context.
246 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
247 * Using this (8KB) buffer size ensures that netlink messages will never be
250 struct mlx5_flow_tcf_context {
251 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
252 uint32_t seq; /* Message sequence number. */
253 uint32_t buf_size; /* Message buffer size. */
254 uint8_t *buf; /* Message buffer. */
257 /** Structure used when extracting the values of a flow counters
258 * from a netlink message.
260 struct flow_tcf_stats_basic {
262 struct gnet_stats_basic counters;
265 /** Empty masks for known item types. */
267 struct rte_flow_item_port_id port_id;
268 struct rte_flow_item_eth eth;
269 struct rte_flow_item_vlan vlan;
270 struct rte_flow_item_ipv4 ipv4;
271 struct rte_flow_item_ipv6 ipv6;
272 struct rte_flow_item_tcp tcp;
273 struct rte_flow_item_udp udp;
274 } flow_tcf_mask_empty;
276 /** Supported masks for known item types. */
277 static const struct {
278 struct rte_flow_item_port_id port_id;
279 struct rte_flow_item_eth eth;
280 struct rte_flow_item_vlan vlan;
281 struct rte_flow_item_ipv4 ipv4;
282 struct rte_flow_item_ipv6 ipv6;
283 struct rte_flow_item_tcp tcp;
284 struct rte_flow_item_udp udp;
285 } flow_tcf_mask_supported = {
290 .type = RTE_BE16(0xffff),
291 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
292 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 /* PCP and VID only, no DEI. */
296 .tci = RTE_BE16(0xefff),
297 .inner_type = RTE_BE16(0xffff),
300 .next_proto_id = 0xff,
301 .src_addr = RTE_BE32(0xffffffff),
302 .dst_addr = RTE_BE32(0xffffffff),
307 "\xff\xff\xff\xff\xff\xff\xff\xff"
308 "\xff\xff\xff\xff\xff\xff\xff\xff",
310 "\xff\xff\xff\xff\xff\xff\xff\xff"
311 "\xff\xff\xff\xff\xff\xff\xff\xff",
314 .src_port = RTE_BE16(0xffff),
315 .dst_port = RTE_BE16(0xffff),
319 .src_port = RTE_BE16(0xffff),
320 .dst_port = RTE_BE16(0xffff),
324 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
325 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
326 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
327 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
328 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
330 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
332 /** DPDK port to network interface index (ifindex) conversion. */
333 struct flow_tcf_ptoi {
334 uint16_t port_id; /**< DPDK port ID. */
335 unsigned int ifindex; /**< Network interface index. */
338 /* Due to a limitation on driver/FW. */
339 #define MLX5_TCF_GROUP_ID_MAX 3
340 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
342 #define MLX5_TCF_FATE_ACTIONS \
343 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
344 MLX5_FLOW_ACTION_JUMP)
346 #define MLX5_TCF_VLAN_ACTIONS \
347 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
348 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
350 #define MLX5_TCF_PEDIT_ACTIONS \
351 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
352 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
353 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
354 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
355 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
357 #define MLX5_TCF_CONFIG_ACTIONS \
358 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
359 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
360 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
361 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
363 #define MAX_PEDIT_KEYS 128
364 #define SZ_PEDIT_KEY_VAL 4
366 #define NUM_OF_PEDIT_KEYS(sz) \
367 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
369 struct pedit_key_ex {
370 enum pedit_header_type htype;
374 struct pedit_parser {
375 struct tc_pedit_sel sel;
376 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
377 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
381 * Create space for using the implicitly created TC flow counter.
384 * Pointer to the Ethernet device structure.
387 * A pointer to the counter data structure, NULL otherwise and
390 static struct mlx5_flow_counter *
391 flow_tcf_counter_new(void)
393 struct mlx5_flow_counter *cnt;
396 * eswitch counter cannot be shared and its id is unknown.
397 * currently returning all with id 0.
398 * in the future maybe better to switch to unique numbers.
400 struct mlx5_flow_counter tmpl = {
408 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
414 /* Implicit counter, do not add to list. */
419 * Set pedit key of MAC address
422 * pointer to action specification
423 * @param[in,out] p_parser
424 * pointer to pedit_parser
427 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
428 struct pedit_parser *p_parser)
430 int idx = p_parser->sel.nkeys;
431 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
432 offsetof(struct ether_hdr, s_addr) :
433 offsetof(struct ether_hdr, d_addr);
434 const struct rte_flow_action_set_mac *conf =
435 (const struct rte_flow_action_set_mac *)actions->conf;
437 p_parser->keys[idx].off = off;
438 p_parser->keys[idx].mask = ~UINT32_MAX;
439 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
440 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
441 memcpy(&p_parser->keys[idx].val,
442 conf->mac_addr, SZ_PEDIT_KEY_VAL);
444 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
445 p_parser->keys[idx].mask = 0xFFFF0000;
446 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
447 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
448 memcpy(&p_parser->keys[idx].val,
449 conf->mac_addr + SZ_PEDIT_KEY_VAL,
450 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
451 p_parser->sel.nkeys = (++idx);
455 * Set pedit key of decrease/set ttl
458 * pointer to action specification
459 * @param[in,out] p_parser
460 * pointer to pedit_parser
461 * @param[in] item_flags
462 * flags of all items presented
465 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
466 struct pedit_parser *p_parser,
469 int idx = p_parser->sel.nkeys;
471 p_parser->keys[idx].mask = 0xFFFFFF00;
472 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
473 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
474 p_parser->keys[idx].off =
475 offsetof(struct ipv4_hdr, time_to_live);
477 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
478 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
479 p_parser->keys[idx].off =
480 offsetof(struct ipv6_hdr, hop_limits);
482 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
483 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
484 p_parser->keys[idx].val = 0x000000FF;
486 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
487 p_parser->keys[idx].val =
488 (__u32)((const struct rte_flow_action_set_ttl *)
489 actions->conf)->ttl_value;
491 p_parser->sel.nkeys = (++idx);
495 * Set pedit key of transport (TCP/UDP) port value
498 * pointer to action specification
499 * @param[in,out] p_parser
500 * pointer to pedit_parser
501 * @param[in] item_flags
502 * flags of all items presented
505 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
506 struct pedit_parser *p_parser,
509 int idx = p_parser->sel.nkeys;
511 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
512 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
513 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
514 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
515 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
516 /* offset of src/dst port is same for TCP and UDP */
517 p_parser->keys[idx].off =
518 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
519 offsetof(struct tcp_hdr, src_port) :
520 offsetof(struct tcp_hdr, dst_port);
521 p_parser->keys[idx].mask = 0xFFFF0000;
522 p_parser->keys[idx].val =
523 (__u32)((const struct rte_flow_action_set_tp *)
524 actions->conf)->port;
525 p_parser->sel.nkeys = (++idx);
529 * Set pedit key of ipv6 address
532 * pointer to action specification
533 * @param[in,out] p_parser
534 * pointer to pedit_parser
537 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
538 struct pedit_parser *p_parser)
540 int idx = p_parser->sel.nkeys;
541 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
543 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
544 offsetof(struct ipv6_hdr, src_addr) :
545 offsetof(struct ipv6_hdr, dst_addr);
546 const struct rte_flow_action_set_ipv6 *conf =
547 (const struct rte_flow_action_set_ipv6 *)actions->conf;
549 for (int i = 0; i < keys; i++, idx++) {
550 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
551 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
552 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
553 p_parser->keys[idx].mask = ~UINT32_MAX;
554 memcpy(&p_parser->keys[idx].val,
555 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
558 p_parser->sel.nkeys += keys;
562 * Set pedit key of ipv4 address
565 * pointer to action specification
566 * @param[in,out] p_parser
567 * pointer to pedit_parser
570 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
571 struct pedit_parser *p_parser)
573 int idx = p_parser->sel.nkeys;
575 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
576 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
577 p_parser->keys[idx].off =
578 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
579 offsetof(struct ipv4_hdr, src_addr) :
580 offsetof(struct ipv4_hdr, dst_addr);
581 p_parser->keys[idx].mask = ~UINT32_MAX;
582 p_parser->keys[idx].val =
583 ((const struct rte_flow_action_set_ipv4 *)
584 actions->conf)->ipv4_addr;
585 p_parser->sel.nkeys = (++idx);
589 * Create the pedit's na attribute in netlink message
590 * on pre-allocate message buffer
593 * pointer to pre-allocated netlink message buffer
594 * @param[in,out] actions
595 * pointer to pointer of actions specification.
596 * @param[in,out] action_flags
597 * pointer to actions flags
598 * @param[in] item_flags
599 * flags of all item presented
602 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
603 const struct rte_flow_action **actions,
606 struct pedit_parser p_parser;
607 struct nlattr *na_act_options;
608 struct nlattr *na_pedit_keys;
610 memset(&p_parser, 0, sizeof(p_parser));
611 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
612 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
613 /* all modify header actions should be in one tc-pedit action */
614 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
615 switch ((*actions)->type) {
616 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
617 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
618 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
620 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
621 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
622 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
624 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
625 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
626 flow_tcf_pedit_key_set_tp_port(*actions,
627 &p_parser, item_flags);
629 case RTE_FLOW_ACTION_TYPE_SET_TTL:
630 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
631 flow_tcf_pedit_key_set_dec_ttl(*actions,
632 &p_parser, item_flags);
634 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
635 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
636 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
639 goto pedit_mnl_msg_done;
643 p_parser.sel.action = TC_ACT_PIPE;
644 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
645 sizeof(p_parser.sel) +
646 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
649 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
650 for (int i = 0; i < p_parser.sel.nkeys; i++) {
651 struct nlattr *na_pedit_key =
652 mnl_attr_nest_start(nl,
653 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
654 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
655 p_parser.keys_ex[i].htype);
656 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
657 p_parser.keys_ex[i].cmd);
658 mnl_attr_nest_end(nl, na_pedit_key);
660 mnl_attr_nest_end(nl, na_pedit_keys);
661 mnl_attr_nest_end(nl, na_act_options);
666 * Calculate max memory size of one TC-pedit actions.
667 * One TC-pedit action can contain set of keys each defining
668 * a rewrite element (rte_flow action)
670 * @param[in,out] actions
671 * actions specification.
672 * @param[in,out] action_flags
674 * @param[in,out] size
677 * Max memory size of one TC-pedit action
680 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
681 uint64_t *action_flags)
687 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
688 SZ_NLATTR_STRZ_OF("pedit") +
689 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
690 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
691 switch ((*actions)->type) {
692 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
693 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
694 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
696 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
697 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
698 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
700 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
701 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
702 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
704 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
705 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
706 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
708 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
709 /* TCP is as same as UDP */
710 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
711 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
713 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
714 /* TCP is as same as UDP */
715 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
716 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
718 case RTE_FLOW_ACTION_TYPE_SET_TTL:
719 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
720 flags |= MLX5_FLOW_ACTION_SET_TTL;
722 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
723 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
724 flags |= MLX5_FLOW_ACTION_DEC_TTL;
726 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
727 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
728 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
730 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
731 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
732 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
735 goto get_pedit_action_size_done;
738 get_pedit_action_size_done:
739 /* TCA_PEDIT_PARAMS_EX */
741 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
742 keys * sizeof(struct tc_pedit_key));
743 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
745 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
746 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
747 SZ_NLATTR_DATA_OF(2));
748 (*action_flags) |= flags;
754 * Retrieve mask for pattern item.
756 * This function does basic sanity checks on a pattern item in order to
757 * return the most appropriate mask for it.
760 * Item specification.
761 * @param[in] mask_default
762 * Default mask for pattern item as specified by the flow API.
763 * @param[in] mask_supported
764 * Mask fields supported by the implementation.
765 * @param[in] mask_empty
766 * Empty mask to return when there is no specification.
768 * Perform verbose error reporting if not NULL.
771 * Either @p item->mask or one of the mask parameters on success, NULL
772 * otherwise and rte_errno is set.
775 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
776 const void *mask_supported, const void *mask_empty,
777 size_t mask_size, struct rte_flow_error *error)
782 /* item->last and item->mask cannot exist without item->spec. */
783 if (!item->spec && (item->mask || item->last)) {
784 rte_flow_error_set(error, EINVAL,
785 RTE_FLOW_ERROR_TYPE_ITEM, item,
786 "\"mask\" or \"last\" field provided without"
787 " a corresponding \"spec\"");
790 /* No spec, no mask, no problem. */
793 mask = item->mask ? item->mask : mask_default;
796 * Single-pass check to make sure that:
797 * - Mask is supported, no bits are set outside mask_supported.
798 * - Both item->spec and item->last are included in mask.
800 for (i = 0; i != mask_size; ++i) {
803 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
804 ((const uint8_t *)mask_supported)[i]) {
805 rte_flow_error_set(error, ENOTSUP,
806 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
807 "unsupported field found"
812 (((const uint8_t *)item->spec)[i] & mask[i]) !=
813 (((const uint8_t *)item->last)[i] & mask[i])) {
814 rte_flow_error_set(error, EINVAL,
815 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
817 "range between \"spec\" and \"last\""
818 " not comprised in \"mask\"");
826 * Build a conversion table between port ID and ifindex.
829 * Pointer to Ethernet device.
831 * Pointer to ptoi table.
833 * Size of ptoi table provided.
836 * Size of ptoi table filled.
839 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
842 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
843 uint16_t port_id[n + 1];
845 unsigned int own = 0;
847 /* At least one port is needed when no switch domain is present. */
850 port_id[0] = dev->data->port_id;
852 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
856 for (i = 0; i != n; ++i) {
857 struct rte_eth_dev_info dev_info;
859 rte_eth_dev_info_get(port_id[i], &dev_info);
860 if (port_id[i] == dev->data->port_id)
862 ptoi[i].port_id = port_id[i];
863 ptoi[i].ifindex = dev_info.if_index;
865 /* Ensure first entry of ptoi[] is the current device. */
871 /* An entry with zero ifindex terminates ptoi[]. */
878 * Verify the @p attr will be correctly understood by the E-switch.
881 * Pointer to flow attributes
883 * Pointer to error structure.
886 * 0 on success, a negative errno value otherwise and rte_errno is set.
889 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
890 struct rte_flow_error *error)
893 * Supported attributes: groups, some priorities and ingress only.
894 * group is supported only if kernel supports chain. Don't care about
895 * transfer as it is the caller's problem.
897 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
898 return rte_flow_error_set(error, ENOTSUP,
899 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
900 "group ID larger than "
901 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
903 else if (attr->group > 0 &&
904 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
905 return rte_flow_error_set(error, ENOTSUP,
906 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
908 "lowest priority level is "
909 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
910 " when group is configured");
911 else if (attr->priority > 0xfffe)
912 return rte_flow_error_set(error, ENOTSUP,
913 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
915 "lowest priority level is 0xfffe");
917 return rte_flow_error_set(error, EINVAL,
918 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
919 attr, "only ingress is supported");
921 return rte_flow_error_set(error, ENOTSUP,
922 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
923 attr, "egress is not supported");
928 * Validate flow for E-Switch.
931 * Pointer to the priv structure.
933 * Pointer to the flow attributes.
935 * Pointer to the list of items.
937 * Pointer to the list of actions.
939 * Pointer to the error structure.
942 * 0 on success, a negative errno value otherwise and rte_ernno is set.
945 flow_tcf_validate(struct rte_eth_dev *dev,
946 const struct rte_flow_attr *attr,
947 const struct rte_flow_item items[],
948 const struct rte_flow_action actions[],
949 struct rte_flow_error *error)
952 const struct rte_flow_item_port_id *port_id;
953 const struct rte_flow_item_eth *eth;
954 const struct rte_flow_item_vlan *vlan;
955 const struct rte_flow_item_ipv4 *ipv4;
956 const struct rte_flow_item_ipv6 *ipv6;
957 const struct rte_flow_item_tcp *tcp;
958 const struct rte_flow_item_udp *udp;
961 const struct rte_flow_action_port_id *port_id;
962 const struct rte_flow_action_jump *jump;
963 const struct rte_flow_action_of_push_vlan *of_push_vlan;
964 const struct rte_flow_action_of_set_vlan_vid *
966 const struct rte_flow_action_of_set_vlan_pcp *
968 const struct rte_flow_action_set_ipv4 *set_ipv4;
969 const struct rte_flow_action_set_ipv6 *set_ipv6;
971 uint32_t item_flags = 0;
972 uint32_t action_flags = 0;
973 uint8_t next_protocol = -1;
974 unsigned int tcm_ifindex = 0;
975 uint8_t pedit_validated = 0;
976 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
977 struct rte_eth_dev *port_id_dev = NULL;
981 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
982 PTOI_TABLE_SZ_MAX(dev)));
983 ret = flow_tcf_validate_attributes(attr, error);
986 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
989 switch (items->type) {
990 case RTE_FLOW_ITEM_TYPE_VOID:
992 case RTE_FLOW_ITEM_TYPE_PORT_ID:
993 mask.port_id = flow_tcf_item_mask
994 (items, &rte_flow_item_port_id_mask,
995 &flow_tcf_mask_supported.port_id,
996 &flow_tcf_mask_empty.port_id,
997 sizeof(flow_tcf_mask_supported.port_id),
1001 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1005 spec.port_id = items->spec;
1006 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1007 return rte_flow_error_set
1009 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1011 "no support for partial mask on"
1013 if (!mask.port_id->id)
1016 for (i = 0; ptoi[i].ifindex; ++i)
1017 if (ptoi[i].port_id == spec.port_id->id)
1019 if (!ptoi[i].ifindex)
1020 return rte_flow_error_set
1022 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1024 "missing data to convert port ID to"
1026 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1027 return rte_flow_error_set
1029 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1031 "cannot match traffic for"
1032 " several port IDs through"
1033 " a single flow rule");
1034 tcm_ifindex = ptoi[i].ifindex;
1037 case RTE_FLOW_ITEM_TYPE_ETH:
1038 ret = mlx5_flow_validate_item_eth(items, item_flags,
1042 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1044 * Redundant check due to different supported mask.
1045 * Same for the rest of items.
1047 mask.eth = flow_tcf_item_mask
1048 (items, &rte_flow_item_eth_mask,
1049 &flow_tcf_mask_supported.eth,
1050 &flow_tcf_mask_empty.eth,
1051 sizeof(flow_tcf_mask_supported.eth),
1055 if (mask.eth->type && mask.eth->type !=
1057 return rte_flow_error_set
1059 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1061 "no support for partial mask on"
1064 case RTE_FLOW_ITEM_TYPE_VLAN:
1065 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1069 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1070 mask.vlan = flow_tcf_item_mask
1071 (items, &rte_flow_item_vlan_mask,
1072 &flow_tcf_mask_supported.vlan,
1073 &flow_tcf_mask_empty.vlan,
1074 sizeof(flow_tcf_mask_supported.vlan),
1078 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1079 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1080 RTE_BE16(0xe000)) ||
1081 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1082 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1083 RTE_BE16(0x0fff)) ||
1084 (mask.vlan->inner_type &&
1085 mask.vlan->inner_type != RTE_BE16(0xffff)))
1086 return rte_flow_error_set
1088 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1090 "no support for partial masks on"
1091 " \"tci\" (PCP and VID parts) and"
1092 " \"inner_type\" fields");
1094 case RTE_FLOW_ITEM_TYPE_IPV4:
1095 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1099 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1100 mask.ipv4 = flow_tcf_item_mask
1101 (items, &rte_flow_item_ipv4_mask,
1102 &flow_tcf_mask_supported.ipv4,
1103 &flow_tcf_mask_empty.ipv4,
1104 sizeof(flow_tcf_mask_supported.ipv4),
1108 if (mask.ipv4->hdr.next_proto_id &&
1109 mask.ipv4->hdr.next_proto_id != 0xff)
1110 return rte_flow_error_set
1112 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1114 "no support for partial mask on"
1115 " \"hdr.next_proto_id\" field");
1116 else if (mask.ipv4->hdr.next_proto_id)
1118 ((const struct rte_flow_item_ipv4 *)
1119 (items->spec))->hdr.next_proto_id;
1121 case RTE_FLOW_ITEM_TYPE_IPV6:
1122 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1126 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1127 mask.ipv6 = flow_tcf_item_mask
1128 (items, &rte_flow_item_ipv6_mask,
1129 &flow_tcf_mask_supported.ipv6,
1130 &flow_tcf_mask_empty.ipv6,
1131 sizeof(flow_tcf_mask_supported.ipv6),
1135 if (mask.ipv6->hdr.proto &&
1136 mask.ipv6->hdr.proto != 0xff)
1137 return rte_flow_error_set
1139 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1141 "no support for partial mask on"
1142 " \"hdr.proto\" field");
1143 else if (mask.ipv6->hdr.proto)
1145 ((const struct rte_flow_item_ipv6 *)
1146 (items->spec))->hdr.proto;
1148 case RTE_FLOW_ITEM_TYPE_UDP:
1149 ret = mlx5_flow_validate_item_udp(items, item_flags,
1150 next_protocol, error);
1153 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1154 mask.udp = flow_tcf_item_mask
1155 (items, &rte_flow_item_udp_mask,
1156 &flow_tcf_mask_supported.udp,
1157 &flow_tcf_mask_empty.udp,
1158 sizeof(flow_tcf_mask_supported.udp),
1163 case RTE_FLOW_ITEM_TYPE_TCP:
1164 ret = mlx5_flow_validate_item_tcp
1167 &flow_tcf_mask_supported.tcp,
1171 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1172 mask.tcp = flow_tcf_item_mask
1173 (items, &rte_flow_item_tcp_mask,
1174 &flow_tcf_mask_supported.tcp,
1175 &flow_tcf_mask_empty.tcp,
1176 sizeof(flow_tcf_mask_supported.tcp),
1182 return rte_flow_error_set(error, ENOTSUP,
1183 RTE_FLOW_ERROR_TYPE_ITEM,
1184 NULL, "item not supported");
1187 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1189 uint32_t current_action_flag = 0;
1191 switch (actions->type) {
1192 case RTE_FLOW_ACTION_TYPE_VOID:
1194 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1195 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1198 conf.port_id = actions->conf;
1199 if (conf.port_id->original)
1202 for (i = 0; ptoi[i].ifindex; ++i)
1203 if (ptoi[i].port_id == conf.port_id->id)
1205 if (!ptoi[i].ifindex)
1206 return rte_flow_error_set
1208 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1210 "missing data to convert port ID to"
1212 port_id_dev = &rte_eth_devices[conf.port_id->id];
1214 case RTE_FLOW_ACTION_TYPE_JUMP:
1215 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1218 conf.jump = actions->conf;
1219 if (attr->group >= conf.jump->group)
1220 return rte_flow_error_set
1222 RTE_FLOW_ERROR_TYPE_ACTION,
1224 "can jump only to a group forward");
1226 case RTE_FLOW_ACTION_TYPE_DROP:
1227 current_action_flag = MLX5_FLOW_ACTION_DROP;
1229 case RTE_FLOW_ACTION_TYPE_COUNT:
1231 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1232 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1234 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1235 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1237 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1238 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1239 return rte_flow_error_set
1241 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1242 "vlan modify is not supported,"
1243 " set action must follow push action");
1244 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1246 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1247 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1248 return rte_flow_error_set
1250 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1251 "vlan modify is not supported,"
1252 " set action must follow push action");
1253 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1255 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1256 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1258 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1259 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1261 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1262 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1264 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1265 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1267 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1268 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1270 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1271 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1273 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1274 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1276 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1277 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1279 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1280 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1282 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1283 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1286 return rte_flow_error_set(error, ENOTSUP,
1287 RTE_FLOW_ERROR_TYPE_ACTION,
1289 "action not supported");
1291 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1293 return rte_flow_error_set(error, EINVAL,
1294 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1296 "action configuration not set");
1298 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1300 return rte_flow_error_set(error, ENOTSUP,
1301 RTE_FLOW_ERROR_TYPE_ACTION,
1303 "set actions should be "
1304 "listed successively");
1305 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1306 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1307 pedit_validated = 1;
1308 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1309 (action_flags & MLX5_TCF_FATE_ACTIONS))
1310 return rte_flow_error_set(error, EINVAL,
1311 RTE_FLOW_ERROR_TYPE_ACTION,
1313 "can't have multiple fate"
1315 action_flags |= current_action_flag;
1317 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1318 (action_flags & MLX5_FLOW_ACTION_DROP))
1319 return rte_flow_error_set(error, ENOTSUP,
1320 RTE_FLOW_ERROR_TYPE_ACTION,
1322 "set action is not compatible with "
1324 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1325 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1326 return rte_flow_error_set(error, ENOTSUP,
1327 RTE_FLOW_ERROR_TYPE_ACTION,
1329 "set action must be followed by "
1332 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1333 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1334 return rte_flow_error_set(error, EINVAL,
1335 RTE_FLOW_ERROR_TYPE_ACTION,
1337 "no ipv4 item found in"
1341 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1342 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1343 return rte_flow_error_set(error, EINVAL,
1344 RTE_FLOW_ERROR_TYPE_ACTION,
1346 "no ipv6 item found in"
1350 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1352 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1353 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1354 return rte_flow_error_set(error, EINVAL,
1355 RTE_FLOW_ERROR_TYPE_ACTION,
1357 "no TCP/UDP item found in"
1361 * FW syndrome (0xA9C090):
1362 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1363 * forward to the uplink.
1365 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1366 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1367 ((struct priv *)port_id_dev->data->dev_private)->representor)
1368 return rte_flow_error_set(error, ENOTSUP,
1369 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1370 "vlan push can only be applied"
1371 " when forwarding to uplink port");
1373 * FW syndrome (0x294609):
1374 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1375 * are supported only while forwarding to vport.
1377 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1378 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1379 return rte_flow_error_set(error, ENOTSUP,
1380 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1381 "vlan actions are supported"
1382 " only with port_id action");
1383 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1384 return rte_flow_error_set(error, EINVAL,
1385 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1386 "no fate action is found");
1388 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1390 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1391 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1392 return rte_flow_error_set(error, EINVAL,
1393 RTE_FLOW_ERROR_TYPE_ACTION,
1395 "no IP found in pattern");
1398 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1399 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1400 return rte_flow_error_set(error, ENOTSUP,
1401 RTE_FLOW_ERROR_TYPE_ACTION,
1403 "no ethernet found in"
1410 * Calculate maximum size of memory for flow items of Linux TC flower and
1411 * extract specified items.
1414 * Pointer to the list of items.
1415 * @param[out] item_flags
1416 * Pointer to the detected items.
1419 * Maximum size of memory for items.
1422 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1423 const struct rte_flow_item items[],
1424 uint64_t *item_flags)
1429 size += SZ_NLATTR_STRZ_OF("flower") +
1430 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1431 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1432 if (attr->group > 0)
1433 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1434 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1435 switch (items->type) {
1436 case RTE_FLOW_ITEM_TYPE_VOID:
1438 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1440 case RTE_FLOW_ITEM_TYPE_ETH:
1441 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1442 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1443 /* dst/src MAC addr and mask. */
1444 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1446 case RTE_FLOW_ITEM_TYPE_VLAN:
1447 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1448 SZ_NLATTR_TYPE_OF(uint16_t) +
1449 /* VLAN Ether type. */
1450 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1451 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1452 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1454 case RTE_FLOW_ITEM_TYPE_IPV4:
1455 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1456 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1457 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1458 /* dst/src IP addr and mask. */
1459 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1461 case RTE_FLOW_ITEM_TYPE_IPV6:
1462 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1463 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1464 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1465 /* dst/src IP addr and mask. */
1466 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1468 case RTE_FLOW_ITEM_TYPE_UDP:
1469 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1470 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1471 /* dst/src port and mask. */
1472 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1474 case RTE_FLOW_ITEM_TYPE_TCP:
1475 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1476 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1477 /* dst/src port and mask. */
1478 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1482 "unsupported item %p type %d,"
1483 " items must be validated before flow creation",
1484 (const void *)items, items->type);
1488 *item_flags = flags;
1493 * Calculate maximum size of memory for flow actions of Linux TC flower and
1494 * extract specified actions.
1496 * @param[in] actions
1497 * Pointer to the list of actions.
1498 * @param[out] action_flags
1499 * Pointer to the detected actions.
1502 * Maximum size of memory for actions.
1505 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1506 uint64_t *action_flags)
1511 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1512 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1513 switch (actions->type) {
1514 case RTE_FLOW_ACTION_TYPE_VOID:
1516 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1517 size += SZ_NLATTR_NEST + /* na_act_index. */
1518 SZ_NLATTR_STRZ_OF("mirred") +
1519 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1520 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1521 flags |= MLX5_FLOW_ACTION_PORT_ID;
1523 case RTE_FLOW_ACTION_TYPE_JUMP:
1524 size += SZ_NLATTR_NEST + /* na_act_index. */
1525 SZ_NLATTR_STRZ_OF("gact") +
1526 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1527 SZ_NLATTR_TYPE_OF(struct tc_gact);
1528 flags |= MLX5_FLOW_ACTION_JUMP;
1530 case RTE_FLOW_ACTION_TYPE_DROP:
1531 size += SZ_NLATTR_NEST + /* na_act_index. */
1532 SZ_NLATTR_STRZ_OF("gact") +
1533 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1534 SZ_NLATTR_TYPE_OF(struct tc_gact);
1535 flags |= MLX5_FLOW_ACTION_DROP;
1537 case RTE_FLOW_ACTION_TYPE_COUNT:
1539 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1540 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1541 goto action_of_vlan;
1542 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1543 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1544 goto action_of_vlan;
1545 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1546 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1547 goto action_of_vlan;
1548 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1549 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1550 goto action_of_vlan;
1552 size += SZ_NLATTR_NEST + /* na_act_index. */
1553 SZ_NLATTR_STRZ_OF("vlan") +
1554 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1555 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1556 SZ_NLATTR_TYPE_OF(uint16_t) +
1557 /* VLAN protocol. */
1558 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1559 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1561 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1562 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1563 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1564 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1565 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1566 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1567 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1568 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1569 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1570 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1571 size += flow_tcf_get_pedit_actions_size(&actions,
1576 "unsupported action %p type %d,"
1577 " items must be validated before flow creation",
1578 (const void *)actions, actions->type);
1582 *action_flags = flags;
1587 * Brand rtnetlink buffer with unique handle.
1589 * This handle should be unique for a given network interface to avoid
1593 * Pointer to Netlink message.
1595 * Unique 32-bit handle to use.
1598 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1600 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1602 tcm->tcm_handle = handle;
1603 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1604 (void *)nlh, handle);
1608 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1609 * memory required, allocates the memory, initializes Netlink message headers
1610 * and set unique TC message handle.
1613 * Pointer to the flow attributes.
1615 * Pointer to the list of items.
1616 * @param[in] actions
1617 * Pointer to the list of actions.
1618 * @param[out] item_flags
1619 * Pointer to bit mask of all items detected.
1620 * @param[out] action_flags
1621 * Pointer to bit mask of all actions detected.
1623 * Pointer to the error structure.
1626 * Pointer to mlx5_flow object on success,
1627 * otherwise NULL and rte_ernno is set.
1629 static struct mlx5_flow *
1630 flow_tcf_prepare(const struct rte_flow_attr *attr,
1631 const struct rte_flow_item items[],
1632 const struct rte_flow_action actions[],
1633 uint64_t *item_flags, uint64_t *action_flags,
1634 struct rte_flow_error *error)
1636 size_t size = sizeof(struct mlx5_flow) +
1637 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1638 MNL_ALIGN(sizeof(struct tcmsg));
1639 struct mlx5_flow *dev_flow;
1640 struct nlmsghdr *nlh;
1643 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1644 size += flow_tcf_get_actions_and_size(actions, action_flags);
1645 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1647 rte_flow_error_set(error, ENOMEM,
1648 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1649 "not enough memory to create E-Switch flow");
1652 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1653 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1654 *dev_flow = (struct mlx5_flow){
1655 .tcf = (struct mlx5_flow_tcf){
1661 * Generate a reasonably unique handle based on the address of the
1664 * This is straightforward on 32-bit systems where the flow pointer can
1665 * be used directly. Otherwise, its least significant part is taken
1666 * after shifting it by the previous power of two of the pointed buffer
1669 if (sizeof(dev_flow) <= 4)
1670 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1672 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1673 rte_log2_u32(rte_align32prevpow2(size)));
1678 * Make adjustments for supporting count actions.
1681 * Pointer to the Ethernet device structure.
1682 * @param[in] dev_flow
1683 * Pointer to mlx5_flow.
1685 * Pointer to error structure.
1688 * 0 On success else a negative errno value is returned and rte_errno is set.
1691 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1692 struct mlx5_flow *dev_flow,
1693 struct rte_flow_error *error)
1695 struct rte_flow *flow = dev_flow->flow;
1697 if (!flow->counter) {
1698 flow->counter = flow_tcf_counter_new();
1700 return rte_flow_error_set(error, rte_errno,
1701 RTE_FLOW_ERROR_TYPE_ACTION,
1703 "cannot get counter"
1710 * Translate flow for Linux TC flower and construct Netlink message.
1713 * Pointer to the priv structure.
1714 * @param[in, out] flow
1715 * Pointer to the sub flow.
1717 * Pointer to the flow attributes.
1719 * Pointer to the list of items.
1720 * @param[in] actions
1721 * Pointer to the list of actions.
1723 * Pointer to the error structure.
1726 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1729 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1730 const struct rte_flow_attr *attr,
1731 const struct rte_flow_item items[],
1732 const struct rte_flow_action actions[],
1733 struct rte_flow_error *error)
1736 const struct rte_flow_item_port_id *port_id;
1737 const struct rte_flow_item_eth *eth;
1738 const struct rte_flow_item_vlan *vlan;
1739 const struct rte_flow_item_ipv4 *ipv4;
1740 const struct rte_flow_item_ipv6 *ipv6;
1741 const struct rte_flow_item_tcp *tcp;
1742 const struct rte_flow_item_udp *udp;
1745 const struct rte_flow_action_port_id *port_id;
1746 const struct rte_flow_action_jump *jump;
1747 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1748 const struct rte_flow_action_of_set_vlan_vid *
1750 const struct rte_flow_action_of_set_vlan_pcp *
1753 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1754 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1755 struct tcmsg *tcm = dev_flow->tcf.tcm;
1756 uint32_t na_act_index_cur;
1757 bool eth_type_set = 0;
1758 bool vlan_present = 0;
1759 bool vlan_eth_type_set = 0;
1760 bool ip_proto_set = 0;
1761 struct nlattr *na_flower;
1762 struct nlattr *na_flower_act;
1763 struct nlattr *na_vlan_id = NULL;
1764 struct nlattr *na_vlan_priority = NULL;
1765 uint64_t item_flags = 0;
1768 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1769 PTOI_TABLE_SZ_MAX(dev)));
1770 nlh = dev_flow->tcf.nlh;
1771 tcm = dev_flow->tcf.tcm;
1772 /* Prepare API must have been called beforehand. */
1773 assert(nlh != NULL && tcm != NULL);
1774 tcm->tcm_family = AF_UNSPEC;
1775 tcm->tcm_ifindex = ptoi[0].ifindex;
1776 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1778 * Priority cannot be zero to prevent the kernel from picking one
1781 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1782 RTE_BE16(ETH_P_ALL));
1783 if (attr->group > 0)
1784 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1785 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1786 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1787 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1788 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1791 switch (items->type) {
1792 case RTE_FLOW_ITEM_TYPE_VOID:
1794 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1795 mask.port_id = flow_tcf_item_mask
1796 (items, &rte_flow_item_port_id_mask,
1797 &flow_tcf_mask_supported.port_id,
1798 &flow_tcf_mask_empty.port_id,
1799 sizeof(flow_tcf_mask_supported.port_id),
1801 assert(mask.port_id);
1802 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1804 spec.port_id = items->spec;
1805 if (!mask.port_id->id)
1808 for (i = 0; ptoi[i].ifindex; ++i)
1809 if (ptoi[i].port_id == spec.port_id->id)
1811 assert(ptoi[i].ifindex);
1812 tcm->tcm_ifindex = ptoi[i].ifindex;
1814 case RTE_FLOW_ITEM_TYPE_ETH:
1815 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1816 mask.eth = flow_tcf_item_mask
1817 (items, &rte_flow_item_eth_mask,
1818 &flow_tcf_mask_supported.eth,
1819 &flow_tcf_mask_empty.eth,
1820 sizeof(flow_tcf_mask_supported.eth),
1823 if (mask.eth == &flow_tcf_mask_empty.eth)
1825 spec.eth = items->spec;
1826 if (mask.eth->type) {
1827 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1831 if (!is_zero_ether_addr(&mask.eth->dst)) {
1832 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1834 spec.eth->dst.addr_bytes);
1835 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1837 mask.eth->dst.addr_bytes);
1839 if (!is_zero_ether_addr(&mask.eth->src)) {
1840 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1842 spec.eth->src.addr_bytes);
1843 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1845 mask.eth->src.addr_bytes);
1848 case RTE_FLOW_ITEM_TYPE_VLAN:
1849 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1850 mask.vlan = flow_tcf_item_mask
1851 (items, &rte_flow_item_vlan_mask,
1852 &flow_tcf_mask_supported.vlan,
1853 &flow_tcf_mask_empty.vlan,
1854 sizeof(flow_tcf_mask_supported.vlan),
1858 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1859 RTE_BE16(ETH_P_8021Q));
1862 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1864 spec.vlan = items->spec;
1865 if (mask.vlan->inner_type) {
1866 mnl_attr_put_u16(nlh,
1867 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1868 spec.vlan->inner_type);
1869 vlan_eth_type_set = 1;
1871 if (mask.vlan->tci & RTE_BE16(0xe000))
1872 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1874 (spec.vlan->tci) >> 13) & 0x7);
1875 if (mask.vlan->tci & RTE_BE16(0x0fff))
1876 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1881 case RTE_FLOW_ITEM_TYPE_IPV4:
1882 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1883 mask.ipv4 = flow_tcf_item_mask
1884 (items, &rte_flow_item_ipv4_mask,
1885 &flow_tcf_mask_supported.ipv4,
1886 &flow_tcf_mask_empty.ipv4,
1887 sizeof(flow_tcf_mask_supported.ipv4),
1890 if (!eth_type_set || !vlan_eth_type_set)
1891 mnl_attr_put_u16(nlh,
1893 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1894 TCA_FLOWER_KEY_ETH_TYPE,
1895 RTE_BE16(ETH_P_IP));
1897 vlan_eth_type_set = 1;
1898 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1900 spec.ipv4 = items->spec;
1901 if (mask.ipv4->hdr.next_proto_id) {
1902 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1903 spec.ipv4->hdr.next_proto_id);
1906 if (mask.ipv4->hdr.src_addr) {
1907 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1908 spec.ipv4->hdr.src_addr);
1909 mnl_attr_put_u32(nlh,
1910 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1911 mask.ipv4->hdr.src_addr);
1913 if (mask.ipv4->hdr.dst_addr) {
1914 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1915 spec.ipv4->hdr.dst_addr);
1916 mnl_attr_put_u32(nlh,
1917 TCA_FLOWER_KEY_IPV4_DST_MASK,
1918 mask.ipv4->hdr.dst_addr);
1921 case RTE_FLOW_ITEM_TYPE_IPV6:
1922 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1923 mask.ipv6 = flow_tcf_item_mask
1924 (items, &rte_flow_item_ipv6_mask,
1925 &flow_tcf_mask_supported.ipv6,
1926 &flow_tcf_mask_empty.ipv6,
1927 sizeof(flow_tcf_mask_supported.ipv6),
1930 if (!eth_type_set || !vlan_eth_type_set)
1931 mnl_attr_put_u16(nlh,
1933 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1934 TCA_FLOWER_KEY_ETH_TYPE,
1935 RTE_BE16(ETH_P_IPV6));
1937 vlan_eth_type_set = 1;
1938 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1940 spec.ipv6 = items->spec;
1941 if (mask.ipv6->hdr.proto) {
1942 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1943 spec.ipv6->hdr.proto);
1946 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1947 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1948 sizeof(spec.ipv6->hdr.src_addr),
1949 spec.ipv6->hdr.src_addr);
1950 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1951 sizeof(mask.ipv6->hdr.src_addr),
1952 mask.ipv6->hdr.src_addr);
1954 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1955 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1956 sizeof(spec.ipv6->hdr.dst_addr),
1957 spec.ipv6->hdr.dst_addr);
1958 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1959 sizeof(mask.ipv6->hdr.dst_addr),
1960 mask.ipv6->hdr.dst_addr);
1963 case RTE_FLOW_ITEM_TYPE_UDP:
1964 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1965 mask.udp = flow_tcf_item_mask
1966 (items, &rte_flow_item_udp_mask,
1967 &flow_tcf_mask_supported.udp,
1968 &flow_tcf_mask_empty.udp,
1969 sizeof(flow_tcf_mask_supported.udp),
1973 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1975 if (mask.udp == &flow_tcf_mask_empty.udp)
1977 spec.udp = items->spec;
1978 if (mask.udp->hdr.src_port) {
1979 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1980 spec.udp->hdr.src_port);
1981 mnl_attr_put_u16(nlh,
1982 TCA_FLOWER_KEY_UDP_SRC_MASK,
1983 mask.udp->hdr.src_port);
1985 if (mask.udp->hdr.dst_port) {
1986 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1987 spec.udp->hdr.dst_port);
1988 mnl_attr_put_u16(nlh,
1989 TCA_FLOWER_KEY_UDP_DST_MASK,
1990 mask.udp->hdr.dst_port);
1993 case RTE_FLOW_ITEM_TYPE_TCP:
1994 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1995 mask.tcp = flow_tcf_item_mask
1996 (items, &rte_flow_item_tcp_mask,
1997 &flow_tcf_mask_supported.tcp,
1998 &flow_tcf_mask_empty.tcp,
1999 sizeof(flow_tcf_mask_supported.tcp),
2003 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2005 if (mask.tcp == &flow_tcf_mask_empty.tcp)
2007 spec.tcp = items->spec;
2008 if (mask.tcp->hdr.src_port) {
2009 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2010 spec.tcp->hdr.src_port);
2011 mnl_attr_put_u16(nlh,
2012 TCA_FLOWER_KEY_TCP_SRC_MASK,
2013 mask.tcp->hdr.src_port);
2015 if (mask.tcp->hdr.dst_port) {
2016 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2017 spec.tcp->hdr.dst_port);
2018 mnl_attr_put_u16(nlh,
2019 TCA_FLOWER_KEY_TCP_DST_MASK,
2020 mask.tcp->hdr.dst_port);
2022 if (mask.tcp->hdr.tcp_flags) {
2025 TCA_FLOWER_KEY_TCP_FLAGS,
2027 (spec.tcp->hdr.tcp_flags));
2030 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2032 (mask.tcp->hdr.tcp_flags));
2036 return rte_flow_error_set(error, ENOTSUP,
2037 RTE_FLOW_ERROR_TYPE_ITEM,
2038 NULL, "item not supported");
2041 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2042 na_act_index_cur = 1;
2043 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2044 struct nlattr *na_act_index;
2045 struct nlattr *na_act;
2046 unsigned int vlan_act;
2049 switch (actions->type) {
2050 case RTE_FLOW_ACTION_TYPE_VOID:
2052 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2053 conf.port_id = actions->conf;
2054 if (conf.port_id->original)
2057 for (i = 0; ptoi[i].ifindex; ++i)
2058 if (ptoi[i].port_id == conf.port_id->id)
2060 assert(ptoi[i].ifindex);
2062 mnl_attr_nest_start(nlh, na_act_index_cur++);
2063 assert(na_act_index);
2064 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2065 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2067 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2068 sizeof(struct tc_mirred),
2069 &(struct tc_mirred){
2070 .action = TC_ACT_STOLEN,
2071 .eaction = TCA_EGRESS_REDIR,
2072 .ifindex = ptoi[i].ifindex,
2074 mnl_attr_nest_end(nlh, na_act);
2075 mnl_attr_nest_end(nlh, na_act_index);
2077 case RTE_FLOW_ACTION_TYPE_JUMP:
2078 conf.jump = actions->conf;
2080 mnl_attr_nest_start(nlh, na_act_index_cur++);
2081 assert(na_act_index);
2082 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2083 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2085 mnl_attr_put(nlh, TCA_GACT_PARMS,
2086 sizeof(struct tc_gact),
2088 .action = TC_ACT_GOTO_CHAIN |
2091 mnl_attr_nest_end(nlh, na_act);
2092 mnl_attr_nest_end(nlh, na_act_index);
2094 case RTE_FLOW_ACTION_TYPE_DROP:
2096 mnl_attr_nest_start(nlh, na_act_index_cur++);
2097 assert(na_act_index);
2098 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2099 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2101 mnl_attr_put(nlh, TCA_GACT_PARMS,
2102 sizeof(struct tc_gact),
2104 .action = TC_ACT_SHOT,
2106 mnl_attr_nest_end(nlh, na_act);
2107 mnl_attr_nest_end(nlh, na_act_index);
2109 case RTE_FLOW_ACTION_TYPE_COUNT:
2111 * Driver adds the count action implicitly for
2112 * each rule it creates.
2114 ret = flow_tcf_translate_action_count(dev,
2119 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2120 conf.of_push_vlan = NULL;
2121 vlan_act = TCA_VLAN_ACT_POP;
2122 goto action_of_vlan;
2123 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2124 conf.of_push_vlan = actions->conf;
2125 vlan_act = TCA_VLAN_ACT_PUSH;
2126 goto action_of_vlan;
2127 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2128 conf.of_set_vlan_vid = actions->conf;
2130 goto override_na_vlan_id;
2131 vlan_act = TCA_VLAN_ACT_MODIFY;
2132 goto action_of_vlan;
2133 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2134 conf.of_set_vlan_pcp = actions->conf;
2135 if (na_vlan_priority)
2136 goto override_na_vlan_priority;
2137 vlan_act = TCA_VLAN_ACT_MODIFY;
2138 goto action_of_vlan;
2141 mnl_attr_nest_start(nlh, na_act_index_cur++);
2142 assert(na_act_index);
2143 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2144 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2146 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2147 sizeof(struct tc_vlan),
2149 .action = TC_ACT_PIPE,
2150 .v_action = vlan_act,
2152 if (vlan_act == TCA_VLAN_ACT_POP) {
2153 mnl_attr_nest_end(nlh, na_act);
2154 mnl_attr_nest_end(nlh, na_act_index);
2157 if (vlan_act == TCA_VLAN_ACT_PUSH)
2158 mnl_attr_put_u16(nlh,
2159 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2160 conf.of_push_vlan->ethertype);
2161 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2162 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2163 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2164 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2165 mnl_attr_nest_end(nlh, na_act);
2166 mnl_attr_nest_end(nlh, na_act_index);
2167 if (actions->type ==
2168 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2169 override_na_vlan_id:
2170 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2171 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2173 (conf.of_set_vlan_vid->vlan_vid);
2174 } else if (actions->type ==
2175 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2176 override_na_vlan_priority:
2177 na_vlan_priority->nla_type =
2178 TCA_VLAN_PUSH_VLAN_PRIORITY;
2179 *(uint8_t *)mnl_attr_get_payload
2180 (na_vlan_priority) =
2181 conf.of_set_vlan_pcp->vlan_pcp;
2184 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2185 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2186 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2187 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2188 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2189 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2190 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2191 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2192 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2193 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2195 mnl_attr_nest_start(nlh, na_act_index_cur++);
2196 flow_tcf_create_pedit_mnl_msg(nlh,
2197 &actions, item_flags);
2198 mnl_attr_nest_end(nlh, na_act_index);
2201 return rte_flow_error_set(error, ENOTSUP,
2202 RTE_FLOW_ERROR_TYPE_ACTION,
2204 "action not supported");
2208 assert(na_flower_act);
2209 mnl_attr_nest_end(nlh, na_flower_act);
2210 mnl_attr_nest_end(nlh, na_flower);
2215 * Send Netlink message with acknowledgment.
2218 * Flow context to use.
2220 * Message to send. This function always raises the NLM_F_ACK flag before
2224 * 0 on success, a negative errno value otherwise and rte_errno is set.
2227 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2229 alignas(struct nlmsghdr)
2230 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2231 nlh->nlmsg_len - sizeof(*nlh)];
2232 uint32_t seq = ctx->seq++;
2233 struct mnl_socket *nl = ctx->nl;
2236 nlh->nlmsg_flags |= NLM_F_ACK;
2237 nlh->nlmsg_seq = seq;
2238 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2240 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2243 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2251 * Apply flow to E-Switch by sending Netlink message.
2254 * Pointer to Ethernet device.
2255 * @param[in, out] flow
2256 * Pointer to the sub flow.
2258 * Pointer to the error structure.
2261 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2264 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2265 struct rte_flow_error *error)
2267 struct priv *priv = dev->data->dev_private;
2268 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2269 struct mlx5_flow *dev_flow;
2270 struct nlmsghdr *nlh;
2272 dev_flow = LIST_FIRST(&flow->dev_flows);
2273 /* E-Switch flow can't be expanded. */
2274 assert(!LIST_NEXT(dev_flow, next));
2275 nlh = dev_flow->tcf.nlh;
2276 nlh->nlmsg_type = RTM_NEWTFILTER;
2277 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2278 if (!flow_tcf_nl_ack(ctx, nlh))
2280 return rte_flow_error_set(error, rte_errno,
2281 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2282 "netlink: failed to create TC flow rule");
2286 * Remove flow from E-Switch by sending Netlink message.
2289 * Pointer to Ethernet device.
2290 * @param[in, out] flow
2291 * Pointer to the sub flow.
2294 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2296 struct priv *priv = dev->data->dev_private;
2297 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2298 struct mlx5_flow *dev_flow;
2299 struct nlmsghdr *nlh;
2303 if (flow->counter) {
2304 if (--flow->counter->ref_cnt == 0) {
2305 rte_free(flow->counter);
2306 flow->counter = NULL;
2309 dev_flow = LIST_FIRST(&flow->dev_flows);
2312 /* E-Switch flow can't be expanded. */
2313 assert(!LIST_NEXT(dev_flow, next));
2314 nlh = dev_flow->tcf.nlh;
2315 nlh->nlmsg_type = RTM_DELTFILTER;
2316 nlh->nlmsg_flags = NLM_F_REQUEST;
2317 flow_tcf_nl_ack(ctx, nlh);
2321 * Remove flow from E-Switch and release resources of the device flow.
2324 * Pointer to Ethernet device.
2325 * @param[in, out] flow
2326 * Pointer to the sub flow.
2329 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2331 struct mlx5_flow *dev_flow;
2335 flow_tcf_remove(dev, flow);
2336 dev_flow = LIST_FIRST(&flow->dev_flows);
2339 /* E-Switch flow can't be expanded. */
2340 assert(!LIST_NEXT(dev_flow, next));
2341 LIST_REMOVE(dev_flow, next);
2346 * Helper routine for figuring the space size required for a parse buffer.
2349 * array of values to use.
2351 * Current location in array.
2353 * Value to compare with.
2356 * The maximum between the given value and the array value on index.
2359 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2361 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2365 * Parse rtnetlink message attributes filling the attribute table with the info
2369 * Attribute table to be filled.
2371 * Maxinum entry in the attribute table.
2373 * The attributes section in the message to be parsed.
2375 * The length of the attributes section in the message.
2378 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2379 struct rtattr *rta, int len)
2381 unsigned short type;
2382 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2383 while (RTA_OK(rta, len)) {
2384 type = rta->rta_type;
2385 if (type <= max && !tb[type])
2387 rta = RTA_NEXT(rta, len);
2392 * Extract flow counters from flower action.
2395 * flower action stats properties in the Netlink message received.
2397 * The backward sequence of rta_types, as written in the attribute table,
2398 * we need to traverse in order to get to the requested object.
2400 * Current location in rta_type table.
2402 * data holding the count statistics of the rte_flow retrieved from
2406 * 0 if data was found and retrieved, -1 otherwise.
2409 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2410 uint16_t rta_type[], int idx,
2411 struct gnet_stats_basic *data)
2413 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2415 struct rtattr *tbs[tca_stats_max + 1];
2417 if (rta == NULL || idx < 0)
2419 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2420 RTA_DATA(rta), RTA_PAYLOAD(rta));
2421 switch (rta_type[idx]) {
2422 case TCA_STATS_BASIC:
2423 if (tbs[TCA_STATS_BASIC]) {
2424 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2425 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2437 * Parse flower single action retrieving the requested action attribute,
2441 * flower action properties in the Netlink message received.
2443 * The backward sequence of rta_types, as written in the attribute table,
2444 * we need to traverse in order to get to the requested object.
2446 * Current location in rta_type table.
2448 * Count statistics retrieved from the message query.
2451 * 0 if data was found and retrieved, -1 otherwise.
2454 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2455 uint16_t rta_type[], int idx, void *data)
2457 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2458 struct rtattr *tb[tca_act_max + 1];
2460 if (arg == NULL || idx < 0)
2462 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2463 RTA_DATA(arg), RTA_PAYLOAD(arg));
2464 if (tb[TCA_ACT_KIND] == NULL)
2466 switch (rta_type[idx]) {
2468 if (tb[TCA_ACT_STATS])
2469 return flow_tcf_nl_action_stats_parse_and_get
2472 (struct gnet_stats_basic *)data);
2481 * Parse flower action section in the message retrieving the requested
2482 * attribute from the first action that provides it.
2485 * flower section in the Netlink message received.
2487 * The backward sequence of rta_types, as written in the attribute table,
2488 * we need to traverse in order to get to the requested object.
2490 * Current location in rta_type table.
2492 * data retrieved from the message query.
2495 * 0 if data was found and retrieved, -1 otherwise.
2498 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2499 uint16_t rta_type[], int idx, void *data)
2501 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2504 if (arg == NULL || idx < 0)
2506 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2507 RTA_DATA(arg), RTA_PAYLOAD(arg));
2508 switch (rta_type[idx]) {
2510 * flow counters are stored in the actions defined by the flow
2511 * and not in the flow itself, therefore we need to traverse the
2512 * flower chain of actions in search for them.
2514 * Note that the index is not decremented here.
2517 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2519 !flow_tcf_nl_parse_one_action_and_get(tb[i],
2532 * Parse flower classifier options in the message, retrieving the requested
2533 * attribute if found.
2536 * flower section in the Netlink message received.
2538 * The backward sequence of rta_types, as written in the attribute table,
2539 * we need to traverse in order to get to the requested object.
2541 * Current location in rta_type table.
2543 * data retrieved from the message query.
2546 * 0 if data was found and retrieved, -1 otherwise.
2549 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2550 uint16_t rta_type[], int idx, void *data)
2552 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2554 struct rtattr *tb[tca_flower_max + 1];
2556 if (!opt || idx < 0)
2558 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2559 RTA_DATA(opt), RTA_PAYLOAD(opt));
2560 switch (rta_type[idx]) {
2561 case TCA_FLOWER_ACT:
2562 if (tb[TCA_FLOWER_ACT])
2563 return flow_tcf_nl_action_parse_and_get
2564 (tb[TCA_FLOWER_ACT],
2565 rta_type, --idx, data);
2574 * Parse Netlink reply on filter query, retrieving the flow counters.
2577 * Message received from Netlink.
2579 * The backward sequence of rta_types, as written in the attribute table,
2580 * we need to traverse in order to get to the requested object.
2582 * Current location in rta_type table.
2584 * data retrieved from the message query.
2587 * 0 if data was found and retrieved, -1 otherwise.
2590 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2591 uint16_t rta_type[], int idx, void *data)
2593 struct nlmsghdr *nlh = cnlh;
2594 struct tcmsg *t = NLMSG_DATA(nlh);
2595 int len = nlh->nlmsg_len;
2596 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2597 struct rtattr *tb[tca_max + 1];
2601 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2602 nlh->nlmsg_type != RTM_GETTFILTER &&
2603 nlh->nlmsg_type != RTM_DELTFILTER)
2605 len -= NLMSG_LENGTH(sizeof(*t));
2608 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2609 /* Not a TC flower flow - bail out */
2610 if (!tb[TCA_KIND] ||
2611 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2613 switch (rta_type[idx]) {
2615 if (tb[TCA_OPTIONS])
2616 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2627 * A callback to parse Netlink reply on TC flower query.
2630 * Message received from Netlink.
2632 * Pointer to data area to be filled by the parsing routine.
2633 * assumed to be a pinter to struct flow_tcf_stats_basic.
2639 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2642 * The backward sequence of rta_types to pass in order to get
2645 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2646 TCA_FLOWER_ACT, TCA_OPTIONS };
2647 struct flow_tcf_stats_basic *sb_data = data;
2649 const struct nlmsghdr *c;
2650 struct nlmsghdr *nc;
2651 } tnlh = { .c = nlh };
2653 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2654 RTE_DIM(rta_type) - 1,
2655 (void *)&sb_data->counters))
2656 sb_data->valid = true;
2661 * Query a TC flower rule for its statistics via netlink.
2664 * Pointer to Ethernet device.
2666 * Pointer to the sub flow.
2668 * data retrieved by the query.
2670 * Perform verbose error reporting if not NULL.
2673 * 0 on success, a negative errno value otherwise and rte_errno is set.
2676 flow_tcf_query_count(struct rte_eth_dev *dev,
2677 struct rte_flow *flow,
2679 struct rte_flow_error *error)
2681 struct flow_tcf_stats_basic sb_data = { 0 };
2682 struct rte_flow_query_count *qc = data;
2683 struct priv *priv = dev->data->dev_private;
2684 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2685 struct mnl_socket *nl = ctx->nl;
2686 struct mlx5_flow *dev_flow;
2687 struct nlmsghdr *nlh;
2688 uint32_t seq = priv->tcf_context->seq++;
2692 dev_flow = LIST_FIRST(&flow->dev_flows);
2693 /* E-Switch flow can't be expanded. */
2694 assert(!LIST_NEXT(dev_flow, next));
2695 if (!dev_flow->flow->counter)
2697 nlh = dev_flow->tcf.nlh;
2698 nlh->nlmsg_type = RTM_GETTFILTER;
2699 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2700 nlh->nlmsg_seq = seq;
2701 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2704 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2707 ret = mnl_cb_run(ctx->buf, ret, seq,
2708 mnl_socket_get_portid(nl),
2709 flow_tcf_nl_message_get_stats_basic,
2712 /* Return the delta from last reset. */
2713 if (sb_data.valid) {
2714 /* Return the delta from last reset. */
2717 qc->hits = sb_data.counters.packets - flow->counter->hits;
2718 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2720 flow->counter->hits = sb_data.counters.packets;
2721 flow->counter->bytes = sb_data.counters.bytes;
2725 return rte_flow_error_set(error, EINVAL,
2726 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2728 "flow does not have counter");
2730 return rte_flow_error_set
2731 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2732 NULL, "netlink: failed to read flow rule counters");
2734 return rte_flow_error_set
2735 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2736 NULL, "counters are not available.");
2742 * @see rte_flow_query()
2746 flow_tcf_query(struct rte_eth_dev *dev,
2747 struct rte_flow *flow,
2748 const struct rte_flow_action *actions,
2750 struct rte_flow_error *error)
2754 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2755 switch (actions->type) {
2756 case RTE_FLOW_ACTION_TYPE_VOID:
2758 case RTE_FLOW_ACTION_TYPE_COUNT:
2759 ret = flow_tcf_query_count(dev, flow, data, error);
2762 return rte_flow_error_set(error, ENOTSUP,
2763 RTE_FLOW_ERROR_TYPE_ACTION,
2765 "action not supported");
2771 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2772 .validate = flow_tcf_validate,
2773 .prepare = flow_tcf_prepare,
2774 .translate = flow_tcf_translate,
2775 .apply = flow_tcf_apply,
2776 .remove = flow_tcf_remove,
2777 .destroy = flow_tcf_destroy,
2778 .query = flow_tcf_query,
2782 * Create and configure a libmnl socket for Netlink flow rules.
2785 * A valid libmnl socket object pointer on success, NULL otherwise and
2788 static struct mnl_socket *
2789 flow_tcf_mnl_socket_create(void)
2791 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2794 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2796 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2801 mnl_socket_close(nl);
2806 * Destroy a libmnl socket.
2809 * Libmnl socket of the @p NETLINK_ROUTE kind.
2812 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2815 mnl_socket_close(nl);
2819 * Initialize ingress qdisc of a given network interface.
2822 * Pointer to tc-flower context to use.
2824 * Index of network interface to initialize.
2826 * Perform verbose error reporting if not NULL.
2829 * 0 on success, a negative errno value otherwise and rte_errno is set.
2832 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2833 unsigned int ifindex, struct rte_flow_error *error)
2835 struct nlmsghdr *nlh;
2837 alignas(struct nlmsghdr)
2838 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2840 /* Destroy existing ingress qdisc and everything attached to it. */
2841 nlh = mnl_nlmsg_put_header(buf);
2842 nlh->nlmsg_type = RTM_DELQDISC;
2843 nlh->nlmsg_flags = NLM_F_REQUEST;
2844 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2845 tcm->tcm_family = AF_UNSPEC;
2846 tcm->tcm_ifindex = ifindex;
2847 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2848 tcm->tcm_parent = TC_H_INGRESS;
2849 /* Ignore errors when qdisc is already absent. */
2850 if (flow_tcf_nl_ack(ctx, nlh) &&
2851 rte_errno != EINVAL && rte_errno != ENOENT)
2852 return rte_flow_error_set(error, rte_errno,
2853 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2854 "netlink: failed to remove ingress"
2856 /* Create fresh ingress qdisc. */
2857 nlh = mnl_nlmsg_put_header(buf);
2858 nlh->nlmsg_type = RTM_NEWQDISC;
2859 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2860 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2861 tcm->tcm_family = AF_UNSPEC;
2862 tcm->tcm_ifindex = ifindex;
2863 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2864 tcm->tcm_parent = TC_H_INGRESS;
2865 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2866 if (flow_tcf_nl_ack(ctx, nlh))
2867 return rte_flow_error_set(error, rte_errno,
2868 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2869 "netlink: failed to create ingress"
2875 * Create libmnl context for Netlink flow rules.
2878 * A valid libmnl socket object pointer on success, NULL otherwise and
2881 struct mlx5_flow_tcf_context *
2882 mlx5_flow_tcf_context_create(void)
2884 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2889 ctx->nl = flow_tcf_mnl_socket_create();
2892 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2893 ctx->buf = rte_zmalloc(__func__,
2894 ctx->buf_size, sizeof(uint32_t));
2897 ctx->seq = random();
2900 mlx5_flow_tcf_context_destroy(ctx);
2905 * Destroy a libmnl context.
2908 * Libmnl socket of the @p NETLINK_ROUTE kind.
2911 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2915 flow_tcf_mnl_socket_destroy(ctx->nl);