1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
118 #include <linux/tc_act/tc_tunnel_key.h>
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
142 struct tc_tunnel_key {
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
163 #ifndef HAVE_TCA_CHAIN
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323 FLOW_TCF_TUNACT_VXLAN_DECAP,
324 FLOW_TCF_TUNACT_VXLAN_ENCAP,
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
339 * Structure for holding netlink context.
340 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341 * Using this (8KB) buffer size ensures that netlink messages will never be
344 struct mlx5_flow_tcf_context {
345 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346 uint32_t seq; /* Message sequence number. */
347 uint32_t buf_size; /* Message buffer size. */
348 uint8_t *buf; /* Message buffer. */
352 * Neigh rule structure. The neigh rule is applied via Netlink to
353 * outer tunnel iface in order to provide destination MAC address
354 * for the VXLAN encapsultion. The neigh rule is implicitly related
355 * to the Flow itself and can be shared by multiple Flows.
357 struct tcf_neigh_rule {
358 LIST_ENTRY(tcf_neigh_rule) next;
360 struct ether_addr eth;
367 uint8_t dst[IPV6_ADDR_LEN];
373 * Local rule structure. The local rule is applied via Netlink to
374 * outer tunnel iface in order to provide local and peer IP addresses
375 * of the VXLAN tunnel for encapsulation. The local rule is implicitly
376 * related to the Flow itself and can be shared by multiple Flows.
378 struct tcf_local_rule {
379 LIST_ENTRY(tcf_local_rule) next;
388 uint8_t dst[IPV6_ADDR_LEN];
389 uint8_t src[IPV6_ADDR_LEN];
394 /** VXLAN virtual netdev. */
396 LIST_ENTRY(tcf_vtep) next;
397 LIST_HEAD(, tcf_neigh_rule) neigh;
398 LIST_HEAD(, tcf_local_rule) local;
400 unsigned int ifindex; /**< Own interface index. */
401 unsigned int ifouter; /**< Index of device attached to. */
406 /** Tunnel descriptor header, common for all tunnel types. */
407 struct flow_tcf_tunnel_hdr {
408 uint32_t type; /**< Tunnel action type. */
409 struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
410 unsigned int ifindex_org; /**< Original dst/src interface */
411 unsigned int *ifindex_ptr; /**< Interface ptr in message. */
414 struct flow_tcf_vxlan_decap {
415 struct flow_tcf_tunnel_hdr hdr;
419 struct flow_tcf_vxlan_encap {
420 struct flow_tcf_tunnel_hdr hdr;
423 struct ether_addr dst;
424 struct ether_addr src;
432 uint8_t dst[IPV6_ADDR_LEN];
433 uint8_t src[IPV6_ADDR_LEN];
445 /** Structure used when extracting the values of a flow counters
446 * from a netlink message.
448 struct flow_tcf_stats_basic {
450 struct gnet_stats_basic counters;
453 /** Empty masks for known item types. */
455 struct rte_flow_item_port_id port_id;
456 struct rte_flow_item_eth eth;
457 struct rte_flow_item_vlan vlan;
458 struct rte_flow_item_ipv4 ipv4;
459 struct rte_flow_item_ipv6 ipv6;
460 struct rte_flow_item_tcp tcp;
461 struct rte_flow_item_udp udp;
462 struct rte_flow_item_vxlan vxlan;
463 } flow_tcf_mask_empty;
465 /** Supported masks for known item types. */
466 static const struct {
467 struct rte_flow_item_port_id port_id;
468 struct rte_flow_item_eth eth;
469 struct rte_flow_item_vlan vlan;
470 struct rte_flow_item_ipv4 ipv4;
471 struct rte_flow_item_ipv6 ipv6;
472 struct rte_flow_item_tcp tcp;
473 struct rte_flow_item_udp udp;
474 struct rte_flow_item_vxlan vxlan;
475 } flow_tcf_mask_supported = {
480 .type = RTE_BE16(0xffff),
481 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
482 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
485 /* PCP and VID only, no DEI. */
486 .tci = RTE_BE16(0xefff),
487 .inner_type = RTE_BE16(0xffff),
490 .next_proto_id = 0xff,
491 .src_addr = RTE_BE32(0xffffffff),
492 .dst_addr = RTE_BE32(0xffffffff),
497 "\xff\xff\xff\xff\xff\xff\xff\xff"
498 "\xff\xff\xff\xff\xff\xff\xff\xff",
500 "\xff\xff\xff\xff\xff\xff\xff\xff"
501 "\xff\xff\xff\xff\xff\xff\xff\xff",
504 .src_port = RTE_BE16(0xffff),
505 .dst_port = RTE_BE16(0xffff),
509 .src_port = RTE_BE16(0xffff),
510 .dst_port = RTE_BE16(0xffff),
513 .vni = "\xff\xff\xff",
517 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
518 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
519 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
520 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
521 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
523 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
525 /** DPDK port to network interface index (ifindex) conversion. */
526 struct flow_tcf_ptoi {
527 uint16_t port_id; /**< DPDK port ID. */
528 unsigned int ifindex; /**< Network interface index. */
531 /* Due to a limitation on driver/FW. */
532 #define MLX5_TCF_GROUP_ID_MAX 3
533 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
535 #define MLX5_TCF_FATE_ACTIONS \
536 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
537 MLX5_FLOW_ACTION_JUMP)
539 #define MLX5_TCF_VLAN_ACTIONS \
540 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
541 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
543 #define MLX5_TCF_VXLAN_ACTIONS \
544 (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
546 #define MLX5_TCF_PEDIT_ACTIONS \
547 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
548 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
549 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
550 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
551 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
553 #define MLX5_TCF_CONFIG_ACTIONS \
554 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
555 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
556 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
557 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
559 #define MAX_PEDIT_KEYS 128
560 #define SZ_PEDIT_KEY_VAL 4
562 #define NUM_OF_PEDIT_KEYS(sz) \
563 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
565 struct pedit_key_ex {
566 enum pedit_header_type htype;
570 struct pedit_parser {
571 struct tc_pedit_sel sel;
572 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
573 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
577 * Create space for using the implicitly created TC flow counter.
580 * Pointer to the Ethernet device structure.
583 * A pointer to the counter data structure, NULL otherwise and
586 static struct mlx5_flow_counter *
587 flow_tcf_counter_new(void)
589 struct mlx5_flow_counter *cnt;
592 * eswitch counter cannot be shared and its id is unknown.
593 * currently returning all with id 0.
594 * in the future maybe better to switch to unique numbers.
596 struct mlx5_flow_counter tmpl = {
599 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
605 /* Implicit counter, do not add to list. */
610 * Set pedit key of MAC address
613 * pointer to action specification
614 * @param[in,out] p_parser
615 * pointer to pedit_parser
618 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
619 struct pedit_parser *p_parser)
621 int idx = p_parser->sel.nkeys;
622 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
623 offsetof(struct ether_hdr, s_addr) :
624 offsetof(struct ether_hdr, d_addr);
625 const struct rte_flow_action_set_mac *conf =
626 (const struct rte_flow_action_set_mac *)actions->conf;
628 p_parser->keys[idx].off = off;
629 p_parser->keys[idx].mask = ~UINT32_MAX;
630 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
631 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
632 memcpy(&p_parser->keys[idx].val,
633 conf->mac_addr, SZ_PEDIT_KEY_VAL);
635 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
636 p_parser->keys[idx].mask = 0xFFFF0000;
637 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
638 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
639 memcpy(&p_parser->keys[idx].val,
640 conf->mac_addr + SZ_PEDIT_KEY_VAL,
641 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
642 p_parser->sel.nkeys = (++idx);
646 * Set pedit key of decrease/set ttl
649 * pointer to action specification
650 * @param[in,out] p_parser
651 * pointer to pedit_parser
652 * @param[in] item_flags
653 * flags of all items presented
656 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
657 struct pedit_parser *p_parser,
660 int idx = p_parser->sel.nkeys;
662 p_parser->keys[idx].mask = 0xFFFFFF00;
663 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
664 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
665 p_parser->keys[idx].off =
666 offsetof(struct ipv4_hdr, time_to_live);
668 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
669 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
670 p_parser->keys[idx].off =
671 offsetof(struct ipv6_hdr, hop_limits);
673 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
674 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
675 p_parser->keys[idx].val = 0x000000FF;
677 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
678 p_parser->keys[idx].val =
679 (__u32)((const struct rte_flow_action_set_ttl *)
680 actions->conf)->ttl_value;
682 p_parser->sel.nkeys = (++idx);
686 * Set pedit key of transport (TCP/UDP) port value
689 * pointer to action specification
690 * @param[in,out] p_parser
691 * pointer to pedit_parser
692 * @param[in] item_flags
693 * flags of all items presented
696 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
697 struct pedit_parser *p_parser,
700 int idx = p_parser->sel.nkeys;
702 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
703 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
704 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
705 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
706 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
707 /* offset of src/dst port is same for TCP and UDP */
708 p_parser->keys[idx].off =
709 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
710 offsetof(struct tcp_hdr, src_port) :
711 offsetof(struct tcp_hdr, dst_port);
712 p_parser->keys[idx].mask = 0xFFFF0000;
713 p_parser->keys[idx].val =
714 (__u32)((const struct rte_flow_action_set_tp *)
715 actions->conf)->port;
716 p_parser->sel.nkeys = (++idx);
720 * Set pedit key of ipv6 address
723 * pointer to action specification
724 * @param[in,out] p_parser
725 * pointer to pedit_parser
728 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
729 struct pedit_parser *p_parser)
731 int idx = p_parser->sel.nkeys;
732 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
734 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
735 offsetof(struct ipv6_hdr, src_addr) :
736 offsetof(struct ipv6_hdr, dst_addr);
737 const struct rte_flow_action_set_ipv6 *conf =
738 (const struct rte_flow_action_set_ipv6 *)actions->conf;
740 for (int i = 0; i < keys; i++, idx++) {
741 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
742 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
743 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
744 p_parser->keys[idx].mask = ~UINT32_MAX;
745 memcpy(&p_parser->keys[idx].val,
746 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
749 p_parser->sel.nkeys += keys;
753 * Set pedit key of ipv4 address
756 * pointer to action specification
757 * @param[in,out] p_parser
758 * pointer to pedit_parser
761 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
762 struct pedit_parser *p_parser)
764 int idx = p_parser->sel.nkeys;
766 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
767 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
768 p_parser->keys[idx].off =
769 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
770 offsetof(struct ipv4_hdr, src_addr) :
771 offsetof(struct ipv4_hdr, dst_addr);
772 p_parser->keys[idx].mask = ~UINT32_MAX;
773 p_parser->keys[idx].val =
774 ((const struct rte_flow_action_set_ipv4 *)
775 actions->conf)->ipv4_addr;
776 p_parser->sel.nkeys = (++idx);
780 * Create the pedit's na attribute in netlink message
781 * on pre-allocate message buffer
784 * pointer to pre-allocated netlink message buffer
785 * @param[in,out] actions
786 * pointer to pointer of actions specification.
787 * @param[in,out] action_flags
788 * pointer to actions flags
789 * @param[in] item_flags
790 * flags of all item presented
793 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
794 const struct rte_flow_action **actions,
797 struct pedit_parser p_parser;
798 struct nlattr *na_act_options;
799 struct nlattr *na_pedit_keys;
801 memset(&p_parser, 0, sizeof(p_parser));
802 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
803 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
804 /* all modify header actions should be in one tc-pedit action */
805 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
806 switch ((*actions)->type) {
807 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
808 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
809 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
811 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
812 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
813 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
815 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
816 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
817 flow_tcf_pedit_key_set_tp_port(*actions,
818 &p_parser, item_flags);
820 case RTE_FLOW_ACTION_TYPE_SET_TTL:
821 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
822 flow_tcf_pedit_key_set_dec_ttl(*actions,
823 &p_parser, item_flags);
825 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
826 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
827 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
830 goto pedit_mnl_msg_done;
834 p_parser.sel.action = TC_ACT_PIPE;
835 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
836 sizeof(p_parser.sel) +
837 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
840 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
841 for (int i = 0; i < p_parser.sel.nkeys; i++) {
842 struct nlattr *na_pedit_key =
843 mnl_attr_nest_start(nl,
844 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
845 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
846 p_parser.keys_ex[i].htype);
847 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
848 p_parser.keys_ex[i].cmd);
849 mnl_attr_nest_end(nl, na_pedit_key);
851 mnl_attr_nest_end(nl, na_pedit_keys);
852 mnl_attr_nest_end(nl, na_act_options);
857 * Calculate max memory size of one TC-pedit actions.
858 * One TC-pedit action can contain set of keys each defining
859 * a rewrite element (rte_flow action)
861 * @param[in,out] actions
862 * actions specification.
863 * @param[in,out] action_flags
865 * @param[in,out] size
868 * Max memory size of one TC-pedit action
871 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
872 uint64_t *action_flags)
878 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
879 SZ_NLATTR_STRZ_OF("pedit") +
880 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
881 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
882 switch ((*actions)->type) {
883 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
884 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
885 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
887 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
888 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
889 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
891 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
892 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
893 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
895 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
896 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
897 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
899 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
900 /* TCP is as same as UDP */
901 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
902 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
904 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
905 /* TCP is as same as UDP */
906 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
907 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
909 case RTE_FLOW_ACTION_TYPE_SET_TTL:
910 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
911 flags |= MLX5_FLOW_ACTION_SET_TTL;
913 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
914 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
915 flags |= MLX5_FLOW_ACTION_DEC_TTL;
917 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
918 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
919 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
921 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
922 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
923 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
926 goto get_pedit_action_size_done;
929 get_pedit_action_size_done:
930 /* TCA_PEDIT_PARAMS_EX */
932 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
933 keys * sizeof(struct tc_pedit_key));
934 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
936 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
937 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
938 SZ_NLATTR_DATA_OF(2));
939 (*action_flags) |= flags;
945 * Retrieve mask for pattern item.
947 * This function does basic sanity checks on a pattern item in order to
948 * return the most appropriate mask for it.
951 * Item specification.
952 * @param[in] mask_default
953 * Default mask for pattern item as specified by the flow API.
954 * @param[in] mask_supported
955 * Mask fields supported by the implementation.
956 * @param[in] mask_empty
957 * Empty mask to return when there is no specification.
959 * Perform verbose error reporting if not NULL.
962 * Either @p item->mask or one of the mask parameters on success, NULL
963 * otherwise and rte_errno is set.
966 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
967 const void *mask_supported, const void *mask_empty,
968 size_t mask_size, struct rte_flow_error *error)
973 /* item->last and item->mask cannot exist without item->spec. */
974 if (!item->spec && (item->mask || item->last)) {
975 rte_flow_error_set(error, EINVAL,
976 RTE_FLOW_ERROR_TYPE_ITEM, item,
977 "\"mask\" or \"last\" field provided without"
978 " a corresponding \"spec\"");
981 /* No spec, no mask, no problem. */
984 mask = item->mask ? item->mask : mask_default;
987 * Single-pass check to make sure that:
988 * - Mask is supported, no bits are set outside mask_supported.
989 * - Both item->spec and item->last are included in mask.
991 for (i = 0; i != mask_size; ++i) {
994 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
995 ((const uint8_t *)mask_supported)[i]) {
996 rte_flow_error_set(error, ENOTSUP,
997 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
998 "unsupported field found"
1003 (((const uint8_t *)item->spec)[i] & mask[i]) !=
1004 (((const uint8_t *)item->last)[i] & mask[i])) {
1005 rte_flow_error_set(error, EINVAL,
1006 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
1008 "range between \"spec\" and \"last\""
1009 " not comprised in \"mask\"");
1017 * Build a conversion table between port ID and ifindex.
1020 * Pointer to Ethernet device.
1022 * Pointer to ptoi table.
1024 * Size of ptoi table provided.
1027 * Size of ptoi table filled.
1030 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
1033 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1034 uint16_t port_id[n + 1];
1036 unsigned int own = 0;
1038 /* At least one port is needed when no switch domain is present. */
1041 port_id[0] = dev->data->port_id;
1043 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1047 for (i = 0; i != n; ++i) {
1048 struct rte_eth_dev_info dev_info;
1050 rte_eth_dev_info_get(port_id[i], &dev_info);
1051 if (port_id[i] == dev->data->port_id)
1053 ptoi[i].port_id = port_id[i];
1054 ptoi[i].ifindex = dev_info.if_index;
1056 /* Ensure first entry of ptoi[] is the current device. */
1059 ptoi[0] = ptoi[own];
1060 ptoi[own] = ptoi[n];
1062 /* An entry with zero ifindex terminates ptoi[]. */
1063 ptoi[n].port_id = 0;
1064 ptoi[n].ifindex = 0;
1069 * Verify the @p attr will be correctly understood by the E-switch.
1072 * Pointer to flow attributes
1074 * Pointer to error structure.
1077 * 0 on success, a negative errno value otherwise and rte_errno is set.
1080 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
1081 struct rte_flow_error *error)
1084 * Supported attributes: groups, some priorities and ingress only.
1085 * group is supported only if kernel supports chain. Don't care about
1086 * transfer as it is the caller's problem.
1088 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
1089 return rte_flow_error_set(error, ENOTSUP,
1090 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
1091 "group ID larger than "
1092 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
1093 " isn't supported");
1094 else if (attr->group > 0 &&
1095 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
1096 return rte_flow_error_set(error, ENOTSUP,
1097 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1099 "lowest priority level is "
1100 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1101 " when group is configured");
1102 else if (attr->priority > 0xfffe)
1103 return rte_flow_error_set(error, ENOTSUP,
1104 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1106 "lowest priority level is 0xfffe");
1108 return rte_flow_error_set(error, EINVAL,
1109 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1110 attr, "only ingress is supported");
1112 return rte_flow_error_set(error, ENOTSUP,
1113 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1114 attr, "egress is not supported");
1119 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
1120 * The routine checks the L2 fields to be used in encapsulation header.
1123 * Pointer to the item structure.
1125 * Pointer to the error structure.
1128 * 0 on success, a negative errno value otherwise and rte_errno is set.
1131 flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
1132 struct rte_flow_error *error)
1134 const struct rte_flow_item_eth *spec = item->spec;
1135 const struct rte_flow_item_eth *mask = item->mask;
1139 * Specification for L2 addresses can be empty
1140 * because these ones are optional and not
1141 * required directly by tc rule. Kernel tries
1142 * to resolve these ones on its own
1147 /* If mask is not specified use the default one. */
1148 mask = &rte_flow_item_eth_mask;
1150 if (memcmp(&mask->dst,
1151 &flow_tcf_mask_empty.eth.dst,
1152 sizeof(flow_tcf_mask_empty.eth.dst))) {
1153 if (memcmp(&mask->dst,
1154 &rte_flow_item_eth_mask.dst,
1155 sizeof(rte_flow_item_eth_mask.dst)))
1156 return rte_flow_error_set
1158 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1159 "no support for partial mask on"
1160 " \"eth.dst\" field");
1162 if (memcmp(&mask->src,
1163 &flow_tcf_mask_empty.eth.src,
1164 sizeof(flow_tcf_mask_empty.eth.src))) {
1165 if (memcmp(&mask->src,
1166 &rte_flow_item_eth_mask.src,
1167 sizeof(rte_flow_item_eth_mask.src)))
1168 return rte_flow_error_set
1170 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1171 "no support for partial mask on"
1172 " \"eth.src\" field");
1174 if (mask->type != RTE_BE16(0x0000)) {
1175 if (mask->type != RTE_BE16(0xffff))
1176 return rte_flow_error_set
1178 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1179 "no support for partial mask on"
1180 " \"eth.type\" field");
1182 "outer ethernet type field"
1183 " cannot be forced for vxlan"
1184 " encapsulation, parameter ignored");
1190 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
1191 * The routine checks the IPv4 fields to be used in encapsulation header.
1194 * Pointer to the item structure.
1196 * Pointer to the error structure.
1199 * 0 on success, a negative errno value otherwise and rte_errno is set.
1202 flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
1203 struct rte_flow_error *error)
1205 const struct rte_flow_item_ipv4 *spec = item->spec;
1206 const struct rte_flow_item_ipv4 *mask = item->mask;
1210 * Specification for IP addresses cannot be empty
1211 * because it is required by tunnel_key parameter.
1213 return rte_flow_error_set(error, EINVAL,
1214 RTE_FLOW_ERROR_TYPE_ITEM, item,
1215 "NULL outer ipv4 address"
1216 " specification for vxlan"
1220 mask = &rte_flow_item_ipv4_mask;
1221 if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1222 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1223 return rte_flow_error_set
1225 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1226 "no support for partial mask on"
1227 " \"ipv4.hdr.dst_addr\" field"
1228 " for vxlan encapsulation");
1229 /* More IPv4 address validations can be put here. */
1232 * Kernel uses the destination IP address to determine
1233 * the routing path and obtain the MAC destination
1234 * address, so IP destination address must be
1235 * specified in the tc rule.
1237 return rte_flow_error_set(error, EINVAL,
1238 RTE_FLOW_ERROR_TYPE_ITEM, item,
1239 "outer ipv4 destination address"
1240 " must be specified for"
1241 " vxlan encapsulation");
1243 if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
1244 if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
1245 return rte_flow_error_set
1247 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1248 "no support for partial mask on"
1249 " \"ipv4.hdr.src_addr\" field"
1250 " for vxlan encapsulation");
1251 /* More IPv4 address validations can be put here. */
1254 * Kernel uses the source IP address to select the
1255 * interface for egress encapsulated traffic, so
1256 * it must be specified in the tc rule.
1258 return rte_flow_error_set(error, EINVAL,
1259 RTE_FLOW_ERROR_TYPE_ITEM, item,
1260 "outer ipv4 source address"
1261 " must be specified for"
1262 " vxlan encapsulation");
1268 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
1269 * The routine checks the IPv6 fields to be used in encapsulation header.
1272 * Pointer to the item structure.
1274 * Pointer to the error structure.
1277 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1280 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
1281 struct rte_flow_error *error)
1283 const struct rte_flow_item_ipv6 *spec = item->spec;
1284 const struct rte_flow_item_ipv6 *mask = item->mask;
1288 * Specification for IP addresses cannot be empty
1289 * because it is required by tunnel_key parameter.
1291 return rte_flow_error_set(error, EINVAL,
1292 RTE_FLOW_ERROR_TYPE_ITEM, item,
1293 "NULL outer ipv6 address"
1294 " specification for"
1295 " vxlan encapsulation");
1298 mask = &rte_flow_item_ipv6_mask;
1299 if (memcmp(&mask->hdr.dst_addr,
1300 &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1302 if (memcmp(&mask->hdr.dst_addr,
1303 &rte_flow_item_ipv6_mask.hdr.dst_addr,
1305 return rte_flow_error_set
1307 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1308 "no support for partial mask on"
1309 " \"ipv6.hdr.dst_addr\" field"
1310 " for vxlan encapsulation");
1311 /* More IPv6 address validations can be put here. */
1314 * Kernel uses the destination IP address to determine
1315 * the routing path and obtain the MAC destination
1316 * address (heigh or gate), so IP destination address
1317 * must be specified within the tc rule.
1319 return rte_flow_error_set(error, EINVAL,
1320 RTE_FLOW_ERROR_TYPE_ITEM, item,
1321 "outer ipv6 destination address"
1322 " must be specified for"
1323 " vxlan encapsulation");
1325 if (memcmp(&mask->hdr.src_addr,
1326 &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1328 if (memcmp(&mask->hdr.src_addr,
1329 &rte_flow_item_ipv6_mask.hdr.src_addr,
1331 return rte_flow_error_set
1333 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1334 "no support for partial mask on"
1335 " \"ipv6.hdr.src_addr\" field"
1336 " for vxlan encapsulation");
1337 /* More L3 address validation can be put here. */
1340 * Kernel uses the source IP address to select the
1341 * interface for egress encapsulated traffic, so
1342 * it must be specified in the tc rule.
1344 return rte_flow_error_set(error, EINVAL,
1345 RTE_FLOW_ERROR_TYPE_ITEM, item,
1346 "outer L3 source address"
1347 " must be specified for"
1348 " vxlan encapsulation");
1354 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
1355 * The routine checks the UDP fields to be used in encapsulation header.
1358 * Pointer to the item structure.
1360 * Pointer to the error structure.
1363 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1366 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
1367 struct rte_flow_error *error)
1369 const struct rte_flow_item_udp *spec = item->spec;
1370 const struct rte_flow_item_udp *mask = item->mask;
1374 * Specification for UDP ports cannot be empty
1375 * because it is required by tunnel_key parameter.
1377 return rte_flow_error_set(error, EINVAL,
1378 RTE_FLOW_ERROR_TYPE_ITEM, item,
1379 "NULL UDP port specification "
1380 " for vxlan encapsulation");
1383 mask = &rte_flow_item_udp_mask;
1384 if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1385 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1386 return rte_flow_error_set
1388 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1389 "no support for partial mask on"
1390 " \"udp.hdr.dst_port\" field"
1391 " for vxlan encapsulation");
1392 if (!spec->hdr.dst_port)
1393 return rte_flow_error_set
1395 RTE_FLOW_ERROR_TYPE_ITEM, item,
1396 "outer UDP remote port cannot be"
1397 " 0 for vxlan encapsulation");
1399 return rte_flow_error_set(error, EINVAL,
1400 RTE_FLOW_ERROR_TYPE_ITEM, item,
1401 "outer UDP remote port"
1402 " must be specified for"
1403 " vxlan encapsulation");
1405 if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1406 if (mask->hdr.src_port != RTE_BE16(0xffff))
1407 return rte_flow_error_set
1409 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1410 "no support for partial mask on"
1411 " \"udp.hdr.src_port\" field"
1412 " for vxlan encapsulation");
1414 "outer UDP source port cannot be"
1415 " forced for vxlan encapsulation,"
1416 " parameter ignored");
1422 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
1423 * The routine checks the VNIP fields to be used in encapsulation header.
1426 * Pointer to the item structure.
1428 * Pointer to the error structure.
1431 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1434 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
1435 struct rte_flow_error *error)
1437 const struct rte_flow_item_vxlan *spec = item->spec;
1438 const struct rte_flow_item_vxlan *mask = item->mask;
1441 /* Outer VNI is required by tunnel_key parameter. */
1442 return rte_flow_error_set(error, EINVAL,
1443 RTE_FLOW_ERROR_TYPE_ITEM, item,
1444 "NULL VNI specification"
1445 " for vxlan encapsulation");
1448 mask = &rte_flow_item_vxlan_mask;
1449 if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
1450 return rte_flow_error_set(error, EINVAL,
1451 RTE_FLOW_ERROR_TYPE_ITEM, item,
1452 "outer VNI must be specified "
1453 "for vxlan encapsulation");
1454 if (mask->vni[0] != 0xff ||
1455 mask->vni[1] != 0xff ||
1456 mask->vni[2] != 0xff)
1457 return rte_flow_error_set(error, ENOTSUP,
1458 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1459 "no support for partial mask on"
1460 " \"vxlan.vni\" field");
1462 if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
1463 return rte_flow_error_set(error, EINVAL,
1464 RTE_FLOW_ERROR_TYPE_ITEM, item,
1465 "vxlan vni cannot be 0");
1470 * Validate VXLAN_ENCAP action item list for E-Switch.
1471 * The routine checks items to be used in encapsulation header.
1474 * Pointer to the VXLAN_ENCAP action structure.
1476 * Pointer to the error structure.
1479 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1482 flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
1483 struct rte_flow_error *error)
1485 const struct rte_flow_item *items;
1487 uint32_t item_flags = 0;
1490 return rte_flow_error_set(error, EINVAL,
1491 RTE_FLOW_ERROR_TYPE_ACTION, action,
1492 "Missing vxlan tunnel"
1493 " action configuration");
1494 items = ((const struct rte_flow_action_vxlan_encap *)
1495 action->conf)->definition;
1497 return rte_flow_error_set(error, EINVAL,
1498 RTE_FLOW_ERROR_TYPE_ACTION, action,
1499 "Missing vxlan tunnel"
1500 " encapsulation parameters");
1501 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1502 switch (items->type) {
1503 case RTE_FLOW_ITEM_TYPE_VOID:
1505 case RTE_FLOW_ITEM_TYPE_ETH:
1506 ret = mlx5_flow_validate_item_eth(items, item_flags,
1510 ret = flow_tcf_validate_vxlan_encap_eth(items, error);
1513 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1516 case RTE_FLOW_ITEM_TYPE_IPV4:
1517 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1521 ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
1524 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1526 case RTE_FLOW_ITEM_TYPE_IPV6:
1527 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1531 ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
1534 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1536 case RTE_FLOW_ITEM_TYPE_UDP:
1537 ret = mlx5_flow_validate_item_udp(items, item_flags,
1541 ret = flow_tcf_validate_vxlan_encap_udp(items, error);
1544 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1546 case RTE_FLOW_ITEM_TYPE_VXLAN:
1547 ret = mlx5_flow_validate_item_vxlan(items,
1551 ret = flow_tcf_validate_vxlan_encap_vni(items, error);
1554 item_flags |= MLX5_FLOW_LAYER_VXLAN;
1557 return rte_flow_error_set
1559 RTE_FLOW_ERROR_TYPE_ITEM, items,
1560 "vxlan encap item not supported");
1563 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
1564 return rte_flow_error_set(error, EINVAL,
1565 RTE_FLOW_ERROR_TYPE_ACTION, action,
1566 "no outer IP layer found"
1567 " for vxlan encapsulation");
1568 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1569 return rte_flow_error_set(error, EINVAL,
1570 RTE_FLOW_ERROR_TYPE_ACTION, action,
1571 "no outer UDP layer found"
1572 " for vxlan encapsulation");
1573 if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
1574 return rte_flow_error_set(error, EINVAL,
1575 RTE_FLOW_ERROR_TYPE_ACTION, action,
1576 "no VXLAN VNI found"
1577 " for vxlan encapsulation");
1582 * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
1583 * is present in actions list.
1586 * Outer IPv4 address item (if any, NULL otherwise).
1588 * Pointer to the error structure.
1591 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1594 flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
1595 struct rte_flow_error *error)
1597 const struct rte_flow_item_ipv4 *spec = ipv4->spec;
1598 const struct rte_flow_item_ipv4 *mask = ipv4->mask;
1602 * Specification for IP addresses cannot be empty
1603 * because it is required as decap parameter.
1605 return rte_flow_error_set(error, EINVAL,
1606 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1607 "NULL outer ipv4 address"
1608 " specification for vxlan"
1609 " for vxlan decapsulation");
1612 mask = &rte_flow_item_ipv4_mask;
1613 if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1614 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1615 return rte_flow_error_set
1617 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1618 "no support for partial mask on"
1619 " \"ipv4.hdr.dst_addr\" field");
1620 /* More IP address validations can be put here. */
1623 * Kernel uses the destination IP address
1624 * to determine the ingress network interface
1625 * for traffic being decapsulated.
1627 return rte_flow_error_set(error, EINVAL,
1628 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1629 "outer ipv4 destination address"
1630 " must be specified for"
1631 " vxlan decapsulation");
1633 /* Source IP address is optional for decap. */
1634 if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
1635 mask->hdr.src_addr != RTE_BE32(0xffffffff))
1636 return rte_flow_error_set(error, ENOTSUP,
1637 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1638 "no support for partial mask on"
1639 " \"ipv4.hdr.src_addr\" field");
1644 * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
1645 * is present in actions list.
1648 * Outer IPv6 address item (if any, NULL otherwise).
1650 * Pointer to the error structure.
1653 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1656 flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
1657 struct rte_flow_error *error)
1659 const struct rte_flow_item_ipv6 *spec = ipv6->spec;
1660 const struct rte_flow_item_ipv6 *mask = ipv6->mask;
1664 * Specification for IP addresses cannot be empty
1665 * because it is required as decap parameter.
1667 return rte_flow_error_set(error, EINVAL,
1668 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1669 "NULL outer ipv6 address"
1670 " specification for vxlan"
1674 mask = &rte_flow_item_ipv6_mask;
1675 if (memcmp(&mask->hdr.dst_addr,
1676 &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1678 if (memcmp(&mask->hdr.dst_addr,
1679 &rte_flow_item_ipv6_mask.hdr.dst_addr,
1681 return rte_flow_error_set
1683 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1684 "no support for partial mask on"
1685 " \"ipv6.hdr.dst_addr\" field");
1686 /* More IP address validations can be put here. */
1689 * Kernel uses the destination IP address
1690 * to determine the ingress network interface
1691 * for traffic being decapsulated.
1693 return rte_flow_error_set(error, EINVAL,
1694 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1695 "outer ipv6 destination address must be "
1696 "specified for vxlan decapsulation");
1698 /* Source IP address is optional for decap. */
1699 if (memcmp(&mask->hdr.src_addr,
1700 &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1702 if (memcmp(&mask->hdr.src_addr,
1703 &rte_flow_item_ipv6_mask.hdr.src_addr,
1705 return rte_flow_error_set
1707 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1708 "no support for partial mask on"
1709 " \"ipv6.hdr.src_addr\" field");
1715 * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
1716 * is present in actions list.
1719 * Outer UDP layer item (if any, NULL otherwise).
1721 * Pointer to the error structure.
1724 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1727 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
1728 struct rte_flow_error *error)
1730 const struct rte_flow_item_udp *spec = udp->spec;
1731 const struct rte_flow_item_udp *mask = udp->mask;
1735 * Specification for UDP ports cannot be empty
1736 * because it is required as decap parameter.
1738 return rte_flow_error_set(error, EINVAL,
1739 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1740 "NULL UDP port specification"
1741 " for VXLAN decapsulation");
1743 mask = &rte_flow_item_udp_mask;
1744 if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1745 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1746 return rte_flow_error_set
1748 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1749 "no support for partial mask on"
1750 " \"udp.hdr.dst_port\" field");
1751 if (!spec->hdr.dst_port)
1752 return rte_flow_error_set
1754 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1755 "zero decap local UDP port");
1757 return rte_flow_error_set(error, EINVAL,
1758 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1759 "outer UDP destination port must be "
1760 "specified for vxlan decapsulation");
1762 if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1763 if (mask->hdr.src_port != RTE_BE16(0xffff))
1764 return rte_flow_error_set
1766 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1767 "no support for partial mask on"
1768 " \"udp.hdr.src_port\" field");
1770 "outer UDP local port cannot be "
1771 "forced for VXLAN encapsulation, "
1772 "parameter ignored");
1778 * Validate flow for E-Switch.
1781 * Pointer to the priv structure.
1783 * Pointer to the flow attributes.
1785 * Pointer to the list of items.
1786 * @param[in] actions
1787 * Pointer to the list of actions.
1789 * Pointer to the error structure.
1792 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1795 flow_tcf_validate(struct rte_eth_dev *dev,
1796 const struct rte_flow_attr *attr,
1797 const struct rte_flow_item items[],
1798 const struct rte_flow_action actions[],
1799 struct rte_flow_error *error)
1802 const struct rte_flow_item_port_id *port_id;
1803 const struct rte_flow_item_eth *eth;
1804 const struct rte_flow_item_vlan *vlan;
1805 const struct rte_flow_item_ipv4 *ipv4;
1806 const struct rte_flow_item_ipv6 *ipv6;
1807 const struct rte_flow_item_tcp *tcp;
1808 const struct rte_flow_item_udp *udp;
1809 const struct rte_flow_item_vxlan *vxlan;
1812 const struct rte_flow_action_port_id *port_id;
1813 const struct rte_flow_action_jump *jump;
1814 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1815 const struct rte_flow_action_of_set_vlan_vid *
1817 const struct rte_flow_action_of_set_vlan_pcp *
1819 const struct rte_flow_action_vxlan_encap *vxlan_encap;
1820 const struct rte_flow_action_set_ipv4 *set_ipv4;
1821 const struct rte_flow_action_set_ipv6 *set_ipv6;
1823 uint64_t item_flags = 0;
1824 uint64_t action_flags = 0;
1825 uint8_t next_protocol = -1;
1826 unsigned int tcm_ifindex = 0;
1827 uint8_t pedit_validated = 0;
1828 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1829 struct rte_eth_dev *port_id_dev = NULL;
1830 bool in_port_id_set;
1833 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1834 PTOI_TABLE_SZ_MAX(dev)));
1835 ret = flow_tcf_validate_attributes(attr, error);
1838 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1840 uint64_t current_action_flag = 0;
1842 switch (actions->type) {
1843 case RTE_FLOW_ACTION_TYPE_VOID:
1845 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1846 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1849 conf.port_id = actions->conf;
1850 if (conf.port_id->original)
1853 for (i = 0; ptoi[i].ifindex; ++i)
1854 if (ptoi[i].port_id == conf.port_id->id)
1856 if (!ptoi[i].ifindex)
1857 return rte_flow_error_set
1859 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1861 "missing data to convert port ID to"
1863 port_id_dev = &rte_eth_devices[conf.port_id->id];
1865 case RTE_FLOW_ACTION_TYPE_JUMP:
1866 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1869 conf.jump = actions->conf;
1870 if (attr->group >= conf.jump->group)
1871 return rte_flow_error_set
1873 RTE_FLOW_ERROR_TYPE_ACTION,
1875 "can jump only to a group forward");
1877 case RTE_FLOW_ACTION_TYPE_DROP:
1878 current_action_flag = MLX5_FLOW_ACTION_DROP;
1880 case RTE_FLOW_ACTION_TYPE_COUNT:
1882 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1883 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1885 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1886 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1888 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1889 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1890 return rte_flow_error_set
1892 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1893 "vlan modify is not supported,"
1894 " set action must follow push action");
1895 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1897 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1898 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1899 return rte_flow_error_set
1901 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1902 "vlan modify is not supported,"
1903 " set action must follow push action");
1904 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1906 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
1907 current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
1909 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
1910 ret = flow_tcf_validate_vxlan_encap(actions, error);
1913 current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
1915 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1916 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1918 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1919 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1921 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1922 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1924 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1925 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1927 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1928 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1930 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1931 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1933 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1934 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1936 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1937 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1939 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1940 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1942 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1943 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1946 return rte_flow_error_set(error, ENOTSUP,
1947 RTE_FLOW_ERROR_TYPE_ACTION,
1949 "action not supported");
1951 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1953 return rte_flow_error_set
1955 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1957 "action configuration not set");
1959 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1961 return rte_flow_error_set(error, ENOTSUP,
1962 RTE_FLOW_ERROR_TYPE_ACTION,
1964 "set actions should be "
1965 "listed successively");
1966 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1967 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1968 pedit_validated = 1;
1969 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1970 (action_flags & MLX5_TCF_FATE_ACTIONS))
1971 return rte_flow_error_set(error, EINVAL,
1972 RTE_FLOW_ERROR_TYPE_ACTION,
1974 "can't have multiple fate"
1976 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1977 (action_flags & MLX5_TCF_VXLAN_ACTIONS))
1978 return rte_flow_error_set(error, EINVAL,
1979 RTE_FLOW_ERROR_TYPE_ACTION,
1981 "can't have multiple vxlan"
1983 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1984 (action_flags & MLX5_TCF_VLAN_ACTIONS))
1985 return rte_flow_error_set(error, ENOTSUP,
1986 RTE_FLOW_ERROR_TYPE_ACTION,
1988 "can't have vxlan and vlan"
1989 " actions in the same rule");
1990 action_flags |= current_action_flag;
1992 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1995 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
1996 items->type != RTE_FLOW_ITEM_TYPE_ETH)
1997 return rte_flow_error_set(error, ENOTSUP,
1998 RTE_FLOW_ERROR_TYPE_ITEM,
2000 "only L2 inner item"
2002 switch (items->type) {
2003 case RTE_FLOW_ITEM_TYPE_VOID:
2005 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2006 mask.port_id = flow_tcf_item_mask
2007 (items, &rte_flow_item_port_id_mask,
2008 &flow_tcf_mask_supported.port_id,
2009 &flow_tcf_mask_empty.port_id,
2010 sizeof(flow_tcf_mask_supported.port_id),
2014 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
2018 spec.port_id = items->spec;
2019 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
2020 return rte_flow_error_set
2022 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2024 "no support for partial mask on"
2026 if (!mask.port_id->id)
2029 for (i = 0; ptoi[i].ifindex; ++i)
2030 if (ptoi[i].port_id == spec.port_id->id)
2032 if (!ptoi[i].ifindex)
2033 return rte_flow_error_set
2035 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2037 "missing data to convert port ID to"
2039 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
2040 return rte_flow_error_set
2042 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2044 "cannot match traffic for"
2045 " several port IDs through"
2046 " a single flow rule");
2047 tcm_ifindex = ptoi[i].ifindex;
2050 case RTE_FLOW_ITEM_TYPE_ETH:
2051 ret = mlx5_flow_validate_item_eth(items, item_flags,
2055 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2056 MLX5_FLOW_LAYER_INNER_L2 :
2057 MLX5_FLOW_LAYER_OUTER_L2;
2059 * Redundant check due to different supported mask.
2060 * Same for the rest of items.
2062 mask.eth = flow_tcf_item_mask
2063 (items, &rte_flow_item_eth_mask,
2064 &flow_tcf_mask_supported.eth,
2065 &flow_tcf_mask_empty.eth,
2066 sizeof(flow_tcf_mask_supported.eth),
2070 if (mask.eth->type && mask.eth->type !=
2072 return rte_flow_error_set
2074 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2076 "no support for partial mask on"
2079 case RTE_FLOW_ITEM_TYPE_VLAN:
2080 ret = mlx5_flow_validate_item_vlan(items, item_flags,
2084 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2085 mask.vlan = flow_tcf_item_mask
2086 (items, &rte_flow_item_vlan_mask,
2087 &flow_tcf_mask_supported.vlan,
2088 &flow_tcf_mask_empty.vlan,
2089 sizeof(flow_tcf_mask_supported.vlan),
2093 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
2094 (mask.vlan->tci & RTE_BE16(0xe000)) !=
2095 RTE_BE16(0xe000)) ||
2096 (mask.vlan->tci & RTE_BE16(0x0fff) &&
2097 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
2098 RTE_BE16(0x0fff)) ||
2099 (mask.vlan->inner_type &&
2100 mask.vlan->inner_type != RTE_BE16(0xffff)))
2101 return rte_flow_error_set
2103 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2105 "no support for partial masks on"
2106 " \"tci\" (PCP and VID parts) and"
2107 " \"inner_type\" fields");
2109 case RTE_FLOW_ITEM_TYPE_IPV4:
2110 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
2114 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2115 mask.ipv4 = flow_tcf_item_mask
2116 (items, &rte_flow_item_ipv4_mask,
2117 &flow_tcf_mask_supported.ipv4,
2118 &flow_tcf_mask_empty.ipv4,
2119 sizeof(flow_tcf_mask_supported.ipv4),
2123 if (mask.ipv4->hdr.next_proto_id &&
2124 mask.ipv4->hdr.next_proto_id != 0xff)
2125 return rte_flow_error_set
2127 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2129 "no support for partial mask on"
2130 " \"hdr.next_proto_id\" field");
2131 else if (mask.ipv4->hdr.next_proto_id)
2133 ((const struct rte_flow_item_ipv4 *)
2134 (items->spec))->hdr.next_proto_id;
2135 if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2136 ret = flow_tcf_validate_vxlan_decap_ipv4
2142 case RTE_FLOW_ITEM_TYPE_IPV6:
2143 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
2147 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2148 mask.ipv6 = flow_tcf_item_mask
2149 (items, &rte_flow_item_ipv6_mask,
2150 &flow_tcf_mask_supported.ipv6,
2151 &flow_tcf_mask_empty.ipv6,
2152 sizeof(flow_tcf_mask_supported.ipv6),
2156 if (mask.ipv6->hdr.proto &&
2157 mask.ipv6->hdr.proto != 0xff)
2158 return rte_flow_error_set
2160 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2162 "no support for partial mask on"
2163 " \"hdr.proto\" field");
2164 else if (mask.ipv6->hdr.proto)
2166 ((const struct rte_flow_item_ipv6 *)
2167 (items->spec))->hdr.proto;
2168 if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2169 ret = flow_tcf_validate_vxlan_decap_ipv6
2175 case RTE_FLOW_ITEM_TYPE_UDP:
2176 ret = mlx5_flow_validate_item_udp(items, item_flags,
2177 next_protocol, error);
2180 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2181 mask.udp = flow_tcf_item_mask
2182 (items, &rte_flow_item_udp_mask,
2183 &flow_tcf_mask_supported.udp,
2184 &flow_tcf_mask_empty.udp,
2185 sizeof(flow_tcf_mask_supported.udp),
2189 if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2190 ret = flow_tcf_validate_vxlan_decap_udp
2196 case RTE_FLOW_ITEM_TYPE_TCP:
2197 ret = mlx5_flow_validate_item_tcp
2200 &flow_tcf_mask_supported.tcp,
2204 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2205 mask.tcp = flow_tcf_item_mask
2206 (items, &rte_flow_item_tcp_mask,
2207 &flow_tcf_mask_supported.tcp,
2208 &flow_tcf_mask_empty.tcp,
2209 sizeof(flow_tcf_mask_supported.tcp),
2214 case RTE_FLOW_ITEM_TYPE_VXLAN:
2215 if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
2216 return rte_flow_error_set
2218 RTE_FLOW_ERROR_TYPE_ITEM,
2220 "vni pattern should be followed by"
2221 " vxlan decapsulation action");
2222 ret = mlx5_flow_validate_item_vxlan(items,
2226 item_flags |= MLX5_FLOW_LAYER_VXLAN;
2227 mask.vxlan = flow_tcf_item_mask
2228 (items, &rte_flow_item_vxlan_mask,
2229 &flow_tcf_mask_supported.vxlan,
2230 &flow_tcf_mask_empty.vxlan,
2231 sizeof(flow_tcf_mask_supported.vxlan), error);
2234 if (mask.vxlan->vni[0] != 0xff ||
2235 mask.vxlan->vni[1] != 0xff ||
2236 mask.vxlan->vni[2] != 0xff)
2237 return rte_flow_error_set
2239 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2241 "no support for partial or "
2242 "empty mask on \"vxlan.vni\" field");
2245 return rte_flow_error_set(error, ENOTSUP,
2246 RTE_FLOW_ERROR_TYPE_ITEM,
2247 items, "item not supported");
2250 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2251 (action_flags & MLX5_FLOW_ACTION_DROP))
2252 return rte_flow_error_set(error, ENOTSUP,
2253 RTE_FLOW_ERROR_TYPE_ACTION,
2255 "set action is not compatible with "
2257 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2258 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2259 return rte_flow_error_set(error, ENOTSUP,
2260 RTE_FLOW_ERROR_TYPE_ACTION,
2262 "set action must be followed by "
2265 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
2266 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
2267 return rte_flow_error_set(error, EINVAL,
2268 RTE_FLOW_ERROR_TYPE_ACTION,
2270 "no ipv4 item found in"
2274 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
2275 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
2276 return rte_flow_error_set(error, EINVAL,
2277 RTE_FLOW_ERROR_TYPE_ACTION,
2279 "no ipv6 item found in"
2283 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
2285 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
2286 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
2287 return rte_flow_error_set(error, EINVAL,
2288 RTE_FLOW_ERROR_TYPE_ACTION,
2290 "no TCP/UDP item found in"
2294 * FW syndrome (0xA9C090):
2295 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
2296 * forward to the uplink.
2298 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
2299 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
2300 ((struct priv *)port_id_dev->data->dev_private)->representor)
2301 return rte_flow_error_set(error, ENOTSUP,
2302 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2303 "vlan push can only be applied"
2304 " when forwarding to uplink port");
2306 * FW syndrome (0x294609):
2307 * set_flow_table_entry: modify/pop/push actions in fdb flow table
2308 * are supported only while forwarding to vport.
2310 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
2311 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2312 return rte_flow_error_set(error, ENOTSUP,
2313 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2314 "vlan actions are supported"
2315 " only with port_id action");
2316 if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
2317 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2318 return rte_flow_error_set(error, ENOTSUP,
2319 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2320 "vxlan actions are supported"
2321 " only with port_id action");
2322 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
2323 return rte_flow_error_set(error, EINVAL,
2324 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2325 "no fate action is found");
2327 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
2329 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2330 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2331 return rte_flow_error_set(error, EINVAL,
2332 RTE_FLOW_ERROR_TYPE_ACTION,
2334 "no IP found in pattern");
2337 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
2338 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
2339 return rte_flow_error_set(error, ENOTSUP,
2340 RTE_FLOW_ERROR_TYPE_ACTION,
2342 "no ethernet found in"
2345 if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2347 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2348 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2349 return rte_flow_error_set(error, EINVAL,
2350 RTE_FLOW_ERROR_TYPE_ACTION,
2352 "no outer IP pattern found"
2353 " for vxlan decap action");
2354 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2355 return rte_flow_error_set(error, EINVAL,
2356 RTE_FLOW_ERROR_TYPE_ACTION,
2358 "no outer UDP pattern found"
2359 " for vxlan decap action");
2360 if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
2361 return rte_flow_error_set(error, EINVAL,
2362 RTE_FLOW_ERROR_TYPE_ACTION,
2364 "no VNI pattern found"
2365 " for vxlan decap action");
2371 * Calculate maximum size of memory for flow items of Linux TC flower and
2372 * extract specified items.
2375 * Pointer to the list of items.
2376 * @param[out] item_flags
2377 * Pointer to the detected items.
2380 * Maximum size of memory for items.
2383 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
2384 const struct rte_flow_item items[],
2385 uint64_t *item_flags)
2390 size += SZ_NLATTR_STRZ_OF("flower") +
2391 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
2392 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
2393 if (attr->group > 0)
2394 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
2395 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2396 switch (items->type) {
2397 case RTE_FLOW_ITEM_TYPE_VOID:
2399 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2401 case RTE_FLOW_ITEM_TYPE_ETH:
2402 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2403 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
2404 /* dst/src MAC addr and mask. */
2405 flags |= MLX5_FLOW_LAYER_OUTER_L2;
2407 case RTE_FLOW_ITEM_TYPE_VLAN:
2408 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2409 SZ_NLATTR_TYPE_OF(uint16_t) +
2410 /* VLAN Ether type. */
2411 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
2412 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
2413 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2415 case RTE_FLOW_ITEM_TYPE_IPV4:
2416 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2417 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2418 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
2419 /* dst/src IP addr and mask. */
2420 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2422 case RTE_FLOW_ITEM_TYPE_IPV6:
2423 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2424 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2425 SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
2426 /* dst/src IP addr and mask. */
2427 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2429 case RTE_FLOW_ITEM_TYPE_UDP:
2430 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2431 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2432 /* dst/src port and mask. */
2433 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2435 case RTE_FLOW_ITEM_TYPE_TCP:
2436 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2437 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2438 /* dst/src port and mask. */
2439 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2441 case RTE_FLOW_ITEM_TYPE_VXLAN:
2442 size += SZ_NLATTR_TYPE_OF(uint32_t);
2443 flags |= MLX5_FLOW_LAYER_VXLAN;
2447 "unsupported item %p type %d,"
2448 " items must be validated before flow creation",
2449 (const void *)items, items->type);
2453 *item_flags = flags;
2458 * Calculate size of memory to store the VXLAN encapsultion
2459 * related items in the Netlink message buffer. Items list
2460 * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
2461 * The item list should be validated.
2464 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
2465 * List of pattern items to scan data from.
2468 * The size the part of Netlink message buffer to store the
2469 * VXLAN encapsulation item attributes.
2472 flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
2474 const struct rte_flow_item *items;
2477 assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
2478 assert(action->conf);
2480 items = ((const struct rte_flow_action_vxlan_encap *)
2481 action->conf)->definition;
2483 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2484 switch (items->type) {
2485 case RTE_FLOW_ITEM_TYPE_VOID:
2487 case RTE_FLOW_ITEM_TYPE_ETH:
2488 /* This item does not require message buffer. */
2490 case RTE_FLOW_ITEM_TYPE_IPV4:
2491 size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
2493 case RTE_FLOW_ITEM_TYPE_IPV6:
2494 size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
2496 case RTE_FLOW_ITEM_TYPE_UDP: {
2497 const struct rte_flow_item_udp *udp = items->mask;
2499 size += SZ_NLATTR_TYPE_OF(uint16_t);
2500 if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
2501 size += SZ_NLATTR_TYPE_OF(uint16_t);
2504 case RTE_FLOW_ITEM_TYPE_VXLAN:
2505 size += SZ_NLATTR_TYPE_OF(uint32_t);
2510 "unsupported item %p type %d,"
2511 " items must be validated"
2512 " before flow creation",
2513 (const void *)items, items->type);
2521 * Calculate maximum size of memory for flow actions of Linux TC flower and
2522 * extract specified actions.
2524 * @param[in] actions
2525 * Pointer to the list of actions.
2526 * @param[out] action_flags
2527 * Pointer to the detected actions.
2530 * Maximum size of memory for actions.
2533 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
2534 uint64_t *action_flags)
2539 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
2540 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2541 switch (actions->type) {
2542 case RTE_FLOW_ACTION_TYPE_VOID:
2544 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2545 size += SZ_NLATTR_NEST + /* na_act_index. */
2546 SZ_NLATTR_STRZ_OF("mirred") +
2547 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2548 SZ_NLATTR_TYPE_OF(struct tc_mirred);
2549 flags |= MLX5_FLOW_ACTION_PORT_ID;
2551 case RTE_FLOW_ACTION_TYPE_JUMP:
2552 size += SZ_NLATTR_NEST + /* na_act_index. */
2553 SZ_NLATTR_STRZ_OF("gact") +
2554 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2555 SZ_NLATTR_TYPE_OF(struct tc_gact);
2556 flags |= MLX5_FLOW_ACTION_JUMP;
2558 case RTE_FLOW_ACTION_TYPE_DROP:
2559 size += SZ_NLATTR_NEST + /* na_act_index. */
2560 SZ_NLATTR_STRZ_OF("gact") +
2561 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2562 SZ_NLATTR_TYPE_OF(struct tc_gact);
2563 flags |= MLX5_FLOW_ACTION_DROP;
2565 case RTE_FLOW_ACTION_TYPE_COUNT:
2567 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2568 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
2569 goto action_of_vlan;
2570 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2571 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
2572 goto action_of_vlan;
2573 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2574 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
2575 goto action_of_vlan;
2576 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2577 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
2578 goto action_of_vlan;
2580 size += SZ_NLATTR_NEST + /* na_act_index. */
2581 SZ_NLATTR_STRZ_OF("vlan") +
2582 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2583 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
2584 SZ_NLATTR_TYPE_OF(uint16_t) +
2585 /* VLAN protocol. */
2586 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
2587 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
2589 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2590 size += SZ_NLATTR_NEST + /* na_act_index. */
2591 SZ_NLATTR_STRZ_OF("tunnel_key") +
2592 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2593 SZ_NLATTR_TYPE_OF(uint8_t);
2594 size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2595 size += flow_tcf_vxlan_encap_size(actions) +
2596 RTE_ALIGN_CEIL /* preceding encap params. */
2597 (sizeof(struct flow_tcf_vxlan_encap),
2599 flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
2601 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
2602 size += SZ_NLATTR_NEST + /* na_act_index. */
2603 SZ_NLATTR_STRZ_OF("tunnel_key") +
2604 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2605 SZ_NLATTR_TYPE_OF(uint8_t);
2606 size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2607 size += RTE_ALIGN_CEIL /* preceding decap params. */
2608 (sizeof(struct flow_tcf_vxlan_decap),
2610 flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
2612 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2613 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2614 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2615 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2616 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2617 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2618 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2619 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2620 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2621 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2622 size += flow_tcf_get_pedit_actions_size(&actions,
2627 "unsupported action %p type %d,"
2628 " items must be validated before flow creation",
2629 (const void *)actions, actions->type);
2633 *action_flags = flags;
2638 * Brand rtnetlink buffer with unique handle.
2640 * This handle should be unique for a given network interface to avoid
2644 * Pointer to Netlink message.
2646 * Unique 32-bit handle to use.
2649 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
2651 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
2653 tcm->tcm_handle = handle;
2654 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
2655 (void *)nlh, handle);
2659 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
2660 * memory required, allocates the memory, initializes Netlink message headers
2661 * and set unique TC message handle.
2664 * Pointer to the flow attributes.
2666 * Pointer to the list of items.
2667 * @param[in] actions
2668 * Pointer to the list of actions.
2669 * @param[out] item_flags
2670 * Pointer to bit mask of all items detected.
2671 * @param[out] action_flags
2672 * Pointer to bit mask of all actions detected.
2674 * Pointer to the error structure.
2677 * Pointer to mlx5_flow object on success,
2678 * otherwise NULL and rte_ernno is set.
2680 static struct mlx5_flow *
2681 flow_tcf_prepare(const struct rte_flow_attr *attr,
2682 const struct rte_flow_item items[],
2683 const struct rte_flow_action actions[],
2684 uint64_t *item_flags, uint64_t *action_flags,
2685 struct rte_flow_error *error)
2687 size_t size = RTE_ALIGN_CEIL
2688 (sizeof(struct mlx5_flow),
2689 alignof(struct flow_tcf_tunnel_hdr)) +
2690 MNL_ALIGN(sizeof(struct nlmsghdr)) +
2691 MNL_ALIGN(sizeof(struct tcmsg));
2692 struct mlx5_flow *dev_flow;
2693 struct nlmsghdr *nlh;
2695 uint8_t *sp, *tun = NULL;
2697 size += flow_tcf_get_items_and_size(attr, items, item_flags);
2698 size += flow_tcf_get_actions_and_size(actions, action_flags);
2699 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
2701 rte_flow_error_set(error, ENOMEM,
2702 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2703 "not enough memory to create E-Switch flow");
2706 sp = (uint8_t *)(dev_flow + 1);
2707 if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
2709 (sp, alignof(struct flow_tcf_tunnel_hdr));
2711 sp += RTE_ALIGN_CEIL
2712 (sizeof(struct flow_tcf_vxlan_encap),
2715 size -= RTE_ALIGN_CEIL
2716 (sizeof(struct flow_tcf_vxlan_encap),
2719 } else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2721 (sp, alignof(struct flow_tcf_tunnel_hdr));
2723 sp += RTE_ALIGN_CEIL
2724 (sizeof(struct flow_tcf_vxlan_decap),
2727 size -= RTE_ALIGN_CEIL
2728 (sizeof(struct flow_tcf_vxlan_decap),
2732 sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
2734 nlh = mnl_nlmsg_put_header(sp);
2735 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2736 *dev_flow = (struct mlx5_flow){
2737 .tcf = (struct mlx5_flow_tcf){
2739 .nlsize = size - RTE_ALIGN_CEIL
2740 (sizeof(struct mlx5_flow),
2741 alignof(struct flow_tcf_tunnel_hdr)),
2743 .tunnel = (struct flow_tcf_tunnel_hdr *)tun,
2748 if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
2749 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
2750 else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
2751 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
2753 * Generate a reasonably unique handle based on the address of the
2756 * This is straightforward on 32-bit systems where the flow pointer can
2757 * be used directly. Otherwise, its least significant part is taken
2758 * after shifting it by the previous power of two of the pointed buffer
2761 if (sizeof(dev_flow) <= 4)
2762 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
2764 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
2765 rte_log2_u32(rte_align32prevpow2(size)));
2770 * Make adjustments for supporting count actions.
2773 * Pointer to the Ethernet device structure.
2774 * @param[in] dev_flow
2775 * Pointer to mlx5_flow.
2777 * Pointer to error structure.
2780 * 0 On success else a negative errno value is returned and rte_errno is set.
2783 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
2784 struct mlx5_flow *dev_flow,
2785 struct rte_flow_error *error)
2787 struct rte_flow *flow = dev_flow->flow;
2789 if (!flow->counter) {
2790 flow->counter = flow_tcf_counter_new();
2792 return rte_flow_error_set(error, rte_errno,
2793 RTE_FLOW_ERROR_TYPE_ACTION,
2795 "cannot get counter"
2802 * Convert VXLAN VNI to 32-bit integer.
2805 * VXLAN VNI in 24-bit wire format.
2808 * VXLAN VNI as a 32-bit integer value in network endian.
2810 static inline rte_be32_t
2811 vxlan_vni_as_be32(const uint8_t vni[3])
2817 .vni = { 0, vni[0], vni[1], vni[2] },
2823 * Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration
2824 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields
2825 * in the encapsulation parameters structure. The item must be prevalidated,
2826 * no any validation checks performed by function.
2829 * RTE_FLOW_ITEM_TYPE_ETH entry specification.
2831 * RTE_FLOW_ITEM_TYPE_ETH entry mask.
2833 * Structure to fill the gathered MAC address data.
2836 flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec,
2837 const struct rte_flow_item_eth *mask,
2838 struct flow_tcf_vxlan_encap *encap)
2840 /* Item must be validated before. No redundant checks. */
2842 if (!mask || !memcmp(&mask->dst,
2843 &rte_flow_item_eth_mask.dst,
2844 sizeof(rte_flow_item_eth_mask.dst))) {
2846 * Ethernet addresses are not supported by
2847 * tc as tunnel_key parameters. Destination
2848 * address is needed to form encap packet
2849 * header and retrieved by kernel from
2850 * implicit sources (ARP table, etc),
2851 * address masks are not supported at all.
2853 encap->eth.dst = spec->dst;
2854 encap->mask |= FLOW_TCF_ENCAP_ETH_DST;
2856 if (!mask || !memcmp(&mask->src,
2857 &rte_flow_item_eth_mask.src,
2858 sizeof(rte_flow_item_eth_mask.src))) {
2860 * Ethernet addresses are not supported by
2861 * tc as tunnel_key parameters. Source ethernet
2862 * address is ignored anyway.
2864 encap->eth.src = spec->src;
2865 encap->mask |= FLOW_TCF_ENCAP_ETH_SRC;
2870 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration
2871 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields
2872 * in the encapsulation parameters structure. The item must be prevalidated,
2873 * no any validation checks performed by function.
2876 * RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
2878 * Structure to fill the gathered IPV4 address data.
2881 flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
2882 struct flow_tcf_vxlan_encap *encap)
2884 /* Item must be validated before. No redundant checks. */
2886 encap->ipv4.dst = spec->hdr.dst_addr;
2887 encap->ipv4.src = spec->hdr.src_addr;
2888 encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
2889 FLOW_TCF_ENCAP_IPV4_DST;
2893 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration
2894 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields
2895 * in the encapsulation parameters structure. The item must be prevalidated,
2896 * no any validation checks performed by function.
2899 * RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
2901 * Structure to fill the gathered IPV6 address data.
2904 flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
2905 struct flow_tcf_vxlan_encap *encap)
2907 /* Item must be validated before. No redundant checks. */
2909 memcpy(encap->ipv6.dst, spec->hdr.dst_addr, IPV6_ADDR_LEN);
2910 memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
2911 encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
2912 FLOW_TCF_ENCAP_IPV6_DST;
2916 * Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration
2917 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields
2918 * in the encapsulation parameters structure. The item must be prevalidated,
2919 * no any validation checks performed by function.
2922 * RTE_FLOW_ITEM_TYPE_UDP entry specification.
2924 * RTE_FLOW_ITEM_TYPE_UDP entry mask.
2926 * Structure to fill the gathered UDP port data.
2929 flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp *spec,
2930 const struct rte_flow_item_udp *mask,
2931 struct flow_tcf_vxlan_encap *encap)
2934 encap->udp.dst = spec->hdr.dst_port;
2935 encap->mask |= FLOW_TCF_ENCAP_UDP_DST;
2936 if (!mask || mask->hdr.src_port != RTE_BE16(0x0000)) {
2937 encap->udp.src = spec->hdr.src_port;
2938 encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC;
2943 * Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration
2944 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields
2945 * in the encapsulation parameters structure. The item must be prevalidated,
2946 * no any validation checks performed by function.
2949 * RTE_FLOW_ITEM_TYPE_VXLAN entry specification.
2951 * Structure to fill the gathered VNI address data.
2954 flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan *spec,
2955 struct flow_tcf_vxlan_encap *encap)
2957 /* Item must be validated before. Do not redundant checks. */
2959 memcpy(encap->vxlan.vni, spec->vni, sizeof(encap->vxlan.vni));
2960 encap->mask |= FLOW_TCF_ENCAP_VXLAN_VNI;
2964 * Populate consolidated encapsulation object from list of pattern items.
2966 * Helper function to process configuration of action such as
2967 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be
2968 * validated, there is no way to return an meaningful error.
2971 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
2972 * List of pattern items to gather data from.
2974 * Structure to fill gathered data.
2977 flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
2978 struct flow_tcf_vxlan_encap *encap)
2981 const struct rte_flow_item_eth *eth;
2982 const struct rte_flow_item_ipv4 *ipv4;
2983 const struct rte_flow_item_ipv6 *ipv6;
2984 const struct rte_flow_item_udp *udp;
2985 const struct rte_flow_item_vxlan *vxlan;
2987 const struct rte_flow_item *items;
2989 assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
2990 assert(action->conf);
2992 items = ((const struct rte_flow_action_vxlan_encap *)
2993 action->conf)->definition;
2995 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2996 switch (items->type) {
2997 case RTE_FLOW_ITEM_TYPE_VOID:
2999 case RTE_FLOW_ITEM_TYPE_ETH:
3000 mask.eth = items->mask;
3001 spec.eth = items->spec;
3002 flow_tcf_parse_vxlan_encap_eth(spec.eth, mask.eth,
3005 case RTE_FLOW_ITEM_TYPE_IPV4:
3006 spec.ipv4 = items->spec;
3007 flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
3009 case RTE_FLOW_ITEM_TYPE_IPV6:
3010 spec.ipv6 = items->spec;
3011 flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
3013 case RTE_FLOW_ITEM_TYPE_UDP:
3014 mask.udp = items->mask;
3015 spec.udp = items->spec;
3016 flow_tcf_parse_vxlan_encap_udp(spec.udp, mask.udp,
3019 case RTE_FLOW_ITEM_TYPE_VXLAN:
3020 spec.vxlan = items->spec;
3021 flow_tcf_parse_vxlan_encap_vni(spec.vxlan, encap);
3026 "unsupported item %p type %d,"
3027 " items must be validated"
3028 " before flow creation",
3029 (const void *)items, items->type);
3037 * Translate flow for Linux TC flower and construct Netlink message.
3040 * Pointer to the priv structure.
3041 * @param[in, out] flow
3042 * Pointer to the sub flow.
3044 * Pointer to the flow attributes.
3046 * Pointer to the list of items.
3047 * @param[in] actions
3048 * Pointer to the list of actions.
3050 * Pointer to the error structure.
3053 * 0 on success, a negative errno value otherwise and rte_ernno is set.
3056 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3057 const struct rte_flow_attr *attr,
3058 const struct rte_flow_item items[],
3059 const struct rte_flow_action actions[],
3060 struct rte_flow_error *error)
3063 const struct rte_flow_item_port_id *port_id;
3064 const struct rte_flow_item_eth *eth;
3065 const struct rte_flow_item_vlan *vlan;
3066 const struct rte_flow_item_ipv4 *ipv4;
3067 const struct rte_flow_item_ipv6 *ipv6;
3068 const struct rte_flow_item_tcp *tcp;
3069 const struct rte_flow_item_udp *udp;
3070 const struct rte_flow_item_vxlan *vxlan;
3073 const struct rte_flow_action_port_id *port_id;
3074 const struct rte_flow_action_jump *jump;
3075 const struct rte_flow_action_of_push_vlan *of_push_vlan;
3076 const struct rte_flow_action_of_set_vlan_vid *
3078 const struct rte_flow_action_of_set_vlan_pcp *
3082 struct flow_tcf_tunnel_hdr *hdr;
3083 struct flow_tcf_vxlan_decap *vxlan;
3088 struct flow_tcf_tunnel_hdr *hdr;
3089 struct flow_tcf_vxlan_encap *vxlan;
3093 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
3094 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
3095 struct tcmsg *tcm = dev_flow->tcf.tcm;
3096 uint32_t na_act_index_cur;
3097 bool eth_type_set = 0;
3098 bool vlan_present = 0;
3099 bool vlan_eth_type_set = 0;
3100 bool ip_proto_set = 0;
3101 struct nlattr *na_flower;
3102 struct nlattr *na_flower_act;
3103 struct nlattr *na_vlan_id = NULL;
3104 struct nlattr *na_vlan_priority = NULL;
3105 uint64_t item_flags = 0;
3108 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
3109 PTOI_TABLE_SZ_MAX(dev)));
3110 if (dev_flow->tcf.tunnel) {
3111 switch (dev_flow->tcf.tunnel->type) {
3112 case FLOW_TCF_TUNACT_VXLAN_DECAP:
3113 decap.vxlan = dev_flow->tcf.vxlan_decap;
3115 case FLOW_TCF_TUNACT_VXLAN_ENCAP:
3116 encap.vxlan = dev_flow->tcf.vxlan_encap;
3118 /* New tunnel actions can be added here. */
3124 nlh = dev_flow->tcf.nlh;
3125 tcm = dev_flow->tcf.tcm;
3126 /* Prepare API must have been called beforehand. */
3127 assert(nlh != NULL && tcm != NULL);
3128 tcm->tcm_family = AF_UNSPEC;
3129 tcm->tcm_ifindex = ptoi[0].ifindex;
3130 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
3132 * Priority cannot be zero to prevent the kernel from picking one
3135 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
3136 RTE_BE16(ETH_P_ALL));
3137 if (attr->group > 0)
3138 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
3139 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
3140 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
3141 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3144 switch (items->type) {
3145 case RTE_FLOW_ITEM_TYPE_VOID:
3147 case RTE_FLOW_ITEM_TYPE_PORT_ID:
3148 mask.port_id = flow_tcf_item_mask
3149 (items, &rte_flow_item_port_id_mask,
3150 &flow_tcf_mask_supported.port_id,
3151 &flow_tcf_mask_empty.port_id,
3152 sizeof(flow_tcf_mask_supported.port_id),
3154 assert(mask.port_id);
3155 if (mask.port_id == &flow_tcf_mask_empty.port_id)
3157 spec.port_id = items->spec;
3158 if (!mask.port_id->id)
3161 for (i = 0; ptoi[i].ifindex; ++i)
3162 if (ptoi[i].port_id == spec.port_id->id)
3164 assert(ptoi[i].ifindex);
3165 tcm->tcm_ifindex = ptoi[i].ifindex;
3167 case RTE_FLOW_ITEM_TYPE_ETH:
3168 item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ?
3169 MLX5_FLOW_LAYER_INNER_L2 :
3170 MLX5_FLOW_LAYER_OUTER_L2;
3171 mask.eth = flow_tcf_item_mask
3172 (items, &rte_flow_item_eth_mask,
3173 &flow_tcf_mask_supported.eth,
3174 &flow_tcf_mask_empty.eth,
3175 sizeof(flow_tcf_mask_supported.eth),
3178 if (mask.eth == &flow_tcf_mask_empty.eth)
3180 spec.eth = items->spec;
3182 !(item_flags & MLX5_FLOW_LAYER_VXLAN)) {
3184 "outer L2 addresses cannot be forced"
3185 " for vxlan decapsulation, parameter"
3189 if (mask.eth->type) {
3190 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
3194 if (!is_zero_ether_addr(&mask.eth->dst)) {
3195 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
3197 spec.eth->dst.addr_bytes);
3198 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
3200 mask.eth->dst.addr_bytes);
3202 if (!is_zero_ether_addr(&mask.eth->src)) {
3203 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
3205 spec.eth->src.addr_bytes);
3206 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
3208 mask.eth->src.addr_bytes);
3210 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3212 case RTE_FLOW_ITEM_TYPE_VLAN:
3215 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
3216 mask.vlan = flow_tcf_item_mask
3217 (items, &rte_flow_item_vlan_mask,
3218 &flow_tcf_mask_supported.vlan,
3219 &flow_tcf_mask_empty.vlan,
3220 sizeof(flow_tcf_mask_supported.vlan),
3224 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
3225 RTE_BE16(ETH_P_8021Q));
3228 if (mask.vlan == &flow_tcf_mask_empty.vlan)
3230 spec.vlan = items->spec;
3231 if (mask.vlan->inner_type) {
3232 mnl_attr_put_u16(nlh,
3233 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
3234 spec.vlan->inner_type);
3235 vlan_eth_type_set = 1;
3237 if (mask.vlan->tci & RTE_BE16(0xe000))
3238 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
3240 (spec.vlan->tci) >> 13) & 0x7);
3241 if (mask.vlan->tci & RTE_BE16(0x0fff))
3242 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
3246 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3248 case RTE_FLOW_ITEM_TYPE_IPV4:
3249 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3250 mask.ipv4 = flow_tcf_item_mask
3251 (items, &rte_flow_item_ipv4_mask,
3252 &flow_tcf_mask_supported.ipv4,
3253 &flow_tcf_mask_empty.ipv4,
3254 sizeof(flow_tcf_mask_supported.ipv4),
3257 spec.ipv4 = items->spec;
3259 if (!eth_type_set && !vlan_eth_type_set)
3263 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
3264 TCA_FLOWER_KEY_ETH_TYPE,
3265 RTE_BE16(ETH_P_IP));
3267 vlan_eth_type_set = 1;
3268 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
3270 if (mask.ipv4->hdr.next_proto_id) {
3272 (nlh, TCA_FLOWER_KEY_IP_PROTO,
3273 spec.ipv4->hdr.next_proto_id);
3277 assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4);
3279 if (mask.ipv4->hdr.src_addr) {
3282 TCA_FLOWER_KEY_ENC_IPV4_SRC :
3283 TCA_FLOWER_KEY_IPV4_SRC,
3284 spec.ipv4->hdr.src_addr);
3287 TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
3288 TCA_FLOWER_KEY_IPV4_SRC_MASK,
3289 mask.ipv4->hdr.src_addr);
3291 if (mask.ipv4->hdr.dst_addr) {
3294 TCA_FLOWER_KEY_ENC_IPV4_DST :
3295 TCA_FLOWER_KEY_IPV4_DST,
3296 spec.ipv4->hdr.dst_addr);
3299 TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
3300 TCA_FLOWER_KEY_IPV4_DST_MASK,
3301 mask.ipv4->hdr.dst_addr);
3303 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3305 case RTE_FLOW_ITEM_TYPE_IPV6:
3306 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3307 mask.ipv6 = flow_tcf_item_mask
3308 (items, &rte_flow_item_ipv6_mask,
3309 &flow_tcf_mask_supported.ipv6,
3310 &flow_tcf_mask_empty.ipv6,
3311 sizeof(flow_tcf_mask_supported.ipv6),
3314 spec.ipv6 = items->spec;
3316 if (!eth_type_set || !vlan_eth_type_set) {
3320 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
3321 TCA_FLOWER_KEY_ETH_TYPE,
3322 RTE_BE16(ETH_P_IPV6));
3325 vlan_eth_type_set = 1;
3326 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
3328 if (mask.ipv6->hdr.proto) {
3330 (nlh, TCA_FLOWER_KEY_IP_PROTO,
3331 spec.ipv6->hdr.proto);
3335 assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6);
3337 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
3338 mnl_attr_put(nlh, decap.vxlan ?
3339 TCA_FLOWER_KEY_ENC_IPV6_SRC :
3340 TCA_FLOWER_KEY_IPV6_SRC,
3342 spec.ipv6->hdr.src_addr);
3343 mnl_attr_put(nlh, decap.vxlan ?
3344 TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
3345 TCA_FLOWER_KEY_IPV6_SRC_MASK,
3347 mask.ipv6->hdr.src_addr);
3349 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
3350 mnl_attr_put(nlh, decap.vxlan ?
3351 TCA_FLOWER_KEY_ENC_IPV6_DST :
3352 TCA_FLOWER_KEY_IPV6_DST,
3354 spec.ipv6->hdr.dst_addr);
3355 mnl_attr_put(nlh, decap.vxlan ?
3356 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
3357 TCA_FLOWER_KEY_IPV6_DST_MASK,
3359 mask.ipv6->hdr.dst_addr);
3361 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3363 case RTE_FLOW_ITEM_TYPE_UDP:
3364 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3365 mask.udp = flow_tcf_item_mask
3366 (items, &rte_flow_item_udp_mask,
3367 &flow_tcf_mask_supported.udp,
3368 &flow_tcf_mask_empty.udp,
3369 sizeof(flow_tcf_mask_supported.udp),
3372 spec.udp = items->spec;
3376 (nlh, TCA_FLOWER_KEY_IP_PROTO,
3378 if (mask.udp == &flow_tcf_mask_empty.udp)
3381 assert(mask.udp != &flow_tcf_mask_empty.udp);
3382 decap.vxlan->udp_port =
3384 (spec.udp->hdr.dst_port);
3386 if (mask.udp->hdr.src_port) {
3389 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
3390 TCA_FLOWER_KEY_UDP_SRC,
3391 spec.udp->hdr.src_port);
3394 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
3395 TCA_FLOWER_KEY_UDP_SRC_MASK,
3396 mask.udp->hdr.src_port);
3398 if (mask.udp->hdr.dst_port) {
3401 TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
3402 TCA_FLOWER_KEY_UDP_DST,
3403 spec.udp->hdr.dst_port);
3406 TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
3407 TCA_FLOWER_KEY_UDP_DST_MASK,
3408 mask.udp->hdr.dst_port);
3410 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3412 case RTE_FLOW_ITEM_TYPE_TCP:
3413 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3414 mask.tcp = flow_tcf_item_mask
3415 (items, &rte_flow_item_tcp_mask,
3416 &flow_tcf_mask_supported.tcp,
3417 &flow_tcf_mask_empty.tcp,
3418 sizeof(flow_tcf_mask_supported.tcp),
3422 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3424 if (mask.tcp == &flow_tcf_mask_empty.tcp)
3426 spec.tcp = items->spec;
3427 if (mask.tcp->hdr.src_port) {
3428 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
3429 spec.tcp->hdr.src_port);
3430 mnl_attr_put_u16(nlh,
3431 TCA_FLOWER_KEY_TCP_SRC_MASK,
3432 mask.tcp->hdr.src_port);
3434 if (mask.tcp->hdr.dst_port) {
3435 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
3436 spec.tcp->hdr.dst_port);
3437 mnl_attr_put_u16(nlh,
3438 TCA_FLOWER_KEY_TCP_DST_MASK,
3439 mask.tcp->hdr.dst_port);
3441 if (mask.tcp->hdr.tcp_flags) {
3444 TCA_FLOWER_KEY_TCP_FLAGS,
3446 (spec.tcp->hdr.tcp_flags));
3449 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
3451 (mask.tcp->hdr.tcp_flags));
3453 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3455 case RTE_FLOW_ITEM_TYPE_VXLAN:
3456 assert(decap.vxlan);
3457 item_flags |= MLX5_FLOW_LAYER_VXLAN;
3458 spec.vxlan = items->spec;
3459 mnl_attr_put_u32(nlh,
3460 TCA_FLOWER_KEY_ENC_KEY_ID,
3461 vxlan_vni_as_be32(spec.vxlan->vni));
3462 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3465 return rte_flow_error_set(error, ENOTSUP,
3466 RTE_FLOW_ERROR_TYPE_ITEM,
3467 NULL, "item not supported");
3470 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
3471 na_act_index_cur = 1;
3472 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3473 struct nlattr *na_act_index;
3474 struct nlattr *na_act;
3475 unsigned int vlan_act;
3478 switch (actions->type) {
3479 case RTE_FLOW_ACTION_TYPE_VOID:
3481 case RTE_FLOW_ACTION_TYPE_PORT_ID:
3482 conf.port_id = actions->conf;
3483 if (conf.port_id->original)
3486 for (i = 0; ptoi[i].ifindex; ++i)
3487 if (ptoi[i].port_id == conf.port_id->id)
3489 assert(ptoi[i].ifindex);
3491 mnl_attr_nest_start(nlh, na_act_index_cur++);
3492 assert(na_act_index);
3493 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
3494 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3497 assert(dev_flow->tcf.tunnel);
3498 dev_flow->tcf.tunnel->ifindex_ptr =
3499 &((struct tc_mirred *)
3500 mnl_attr_get_payload
3501 (mnl_nlmsg_get_payload_tail
3504 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
3505 sizeof(struct tc_mirred),
3506 &(struct tc_mirred){
3507 .action = TC_ACT_STOLEN,
3508 .eaction = TCA_EGRESS_REDIR,
3509 .ifindex = ptoi[i].ifindex,
3511 mnl_attr_nest_end(nlh, na_act);
3512 mnl_attr_nest_end(nlh, na_act_index);
3514 case RTE_FLOW_ACTION_TYPE_JUMP:
3515 conf.jump = actions->conf;
3517 mnl_attr_nest_start(nlh, na_act_index_cur++);
3518 assert(na_act_index);
3519 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3520 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3522 mnl_attr_put(nlh, TCA_GACT_PARMS,
3523 sizeof(struct tc_gact),
3525 .action = TC_ACT_GOTO_CHAIN |
3528 mnl_attr_nest_end(nlh, na_act);
3529 mnl_attr_nest_end(nlh, na_act_index);
3531 case RTE_FLOW_ACTION_TYPE_DROP:
3533 mnl_attr_nest_start(nlh, na_act_index_cur++);
3534 assert(na_act_index);
3535 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3536 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3538 mnl_attr_put(nlh, TCA_GACT_PARMS,
3539 sizeof(struct tc_gact),
3541 .action = TC_ACT_SHOT,
3543 mnl_attr_nest_end(nlh, na_act);
3544 mnl_attr_nest_end(nlh, na_act_index);
3546 case RTE_FLOW_ACTION_TYPE_COUNT:
3548 * Driver adds the count action implicitly for
3549 * each rule it creates.
3551 ret = flow_tcf_translate_action_count(dev,
3556 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3557 conf.of_push_vlan = NULL;
3558 vlan_act = TCA_VLAN_ACT_POP;
3559 goto action_of_vlan;
3560 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3561 conf.of_push_vlan = actions->conf;
3562 vlan_act = TCA_VLAN_ACT_PUSH;
3563 goto action_of_vlan;
3564 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3565 conf.of_set_vlan_vid = actions->conf;
3567 goto override_na_vlan_id;
3568 vlan_act = TCA_VLAN_ACT_MODIFY;
3569 goto action_of_vlan;
3570 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3571 conf.of_set_vlan_pcp = actions->conf;
3572 if (na_vlan_priority)
3573 goto override_na_vlan_priority;
3574 vlan_act = TCA_VLAN_ACT_MODIFY;
3575 goto action_of_vlan;
3578 mnl_attr_nest_start(nlh, na_act_index_cur++);
3579 assert(na_act_index);
3580 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
3581 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3583 mnl_attr_put(nlh, TCA_VLAN_PARMS,
3584 sizeof(struct tc_vlan),
3586 .action = TC_ACT_PIPE,
3587 .v_action = vlan_act,
3589 if (vlan_act == TCA_VLAN_ACT_POP) {
3590 mnl_attr_nest_end(nlh, na_act);
3591 mnl_attr_nest_end(nlh, na_act_index);
3594 if (vlan_act == TCA_VLAN_ACT_PUSH)
3595 mnl_attr_put_u16(nlh,
3596 TCA_VLAN_PUSH_VLAN_PROTOCOL,
3597 conf.of_push_vlan->ethertype);
3598 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
3599 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
3600 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
3601 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
3602 mnl_attr_nest_end(nlh, na_act);
3603 mnl_attr_nest_end(nlh, na_act_index);
3604 if (actions->type ==
3605 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
3606 override_na_vlan_id:
3607 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
3608 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
3610 (conf.of_set_vlan_vid->vlan_vid);
3611 } else if (actions->type ==
3612 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
3613 override_na_vlan_priority:
3614 na_vlan_priority->nla_type =
3615 TCA_VLAN_PUSH_VLAN_PRIORITY;
3616 *(uint8_t *)mnl_attr_get_payload
3617 (na_vlan_priority) =
3618 conf.of_set_vlan_pcp->vlan_pcp;
3621 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3622 assert(decap.vxlan);
3623 assert(dev_flow->tcf.tunnel);
3624 dev_flow->tcf.tunnel->ifindex_ptr =
3625 (unsigned int *)&tcm->tcm_ifindex;
3627 mnl_attr_nest_start(nlh, na_act_index_cur++);
3628 assert(na_act_index);
3629 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
3630 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3632 mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
3633 sizeof(struct tc_tunnel_key),
3634 &(struct tc_tunnel_key){
3635 .action = TC_ACT_PIPE,
3636 .t_action = TCA_TUNNEL_KEY_ACT_RELEASE,
3638 mnl_attr_nest_end(nlh, na_act);
3639 mnl_attr_nest_end(nlh, na_act_index);
3640 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3642 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3643 assert(encap.vxlan);
3644 flow_tcf_vxlan_encap_parse(actions, encap.vxlan);
3646 mnl_attr_nest_start(nlh, na_act_index_cur++);
3647 assert(na_act_index);
3648 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
3649 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3651 mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
3652 sizeof(struct tc_tunnel_key),
3653 &(struct tc_tunnel_key){
3654 .action = TC_ACT_PIPE,
3655 .t_action = TCA_TUNNEL_KEY_ACT_SET,
3657 if (encap.vxlan->mask & FLOW_TCF_ENCAP_UDP_DST)
3658 mnl_attr_put_u16(nlh,
3659 TCA_TUNNEL_KEY_ENC_DST_PORT,
3660 encap.vxlan->udp.dst);
3661 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_SRC)
3662 mnl_attr_put_u32(nlh,
3663 TCA_TUNNEL_KEY_ENC_IPV4_SRC,
3664 encap.vxlan->ipv4.src);
3665 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_DST)
3666 mnl_attr_put_u32(nlh,
3667 TCA_TUNNEL_KEY_ENC_IPV4_DST,
3668 encap.vxlan->ipv4.dst);
3669 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_SRC)
3671 TCA_TUNNEL_KEY_ENC_IPV6_SRC,
3672 sizeof(encap.vxlan->ipv6.src),
3673 &encap.vxlan->ipv6.src);
3674 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_DST)
3676 TCA_TUNNEL_KEY_ENC_IPV6_DST,
3677 sizeof(encap.vxlan->ipv6.dst),
3678 &encap.vxlan->ipv6.dst);
3679 if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
3680 mnl_attr_put_u32(nlh,
3681 TCA_TUNNEL_KEY_ENC_KEY_ID,
3683 (encap.vxlan->vxlan.vni));
3684 mnl_attr_put_u8(nlh, TCA_TUNNEL_KEY_NO_CSUM, 0);
3685 mnl_attr_nest_end(nlh, na_act);
3686 mnl_attr_nest_end(nlh, na_act_index);
3687 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3689 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3690 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3691 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3692 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3693 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3694 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3695 case RTE_FLOW_ACTION_TYPE_SET_TTL:
3696 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3697 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3698 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3700 mnl_attr_nest_start(nlh, na_act_index_cur++);
3701 flow_tcf_create_pedit_mnl_msg(nlh,
3702 &actions, item_flags);
3703 mnl_attr_nest_end(nlh, na_act_index);
3706 return rte_flow_error_set(error, ENOTSUP,
3707 RTE_FLOW_ERROR_TYPE_ACTION,
3709 "action not supported");
3713 assert(na_flower_act);
3714 mnl_attr_nest_end(nlh, na_flower_act);
3715 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ?
3716 0 : TCA_CLS_FLAGS_SKIP_SW);
3717 mnl_attr_nest_end(nlh, na_flower);
3718 if (dev_flow->tcf.tunnel && dev_flow->tcf.tunnel->ifindex_ptr)
3719 dev_flow->tcf.tunnel->ifindex_org =
3720 *dev_flow->tcf.tunnel->ifindex_ptr;
3721 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3726 * Send Netlink message with acknowledgment.
3729 * Flow context to use.
3731 * Message to send. This function always raises the NLM_F_ACK flag before
3734 * Message length. Message buffer may contain multiple commands and
3735 * nlmsg_len field not always corresponds to actual message length.
3736 * If 0 specified the nlmsg_len field in header is used as message length.
3738 * Callback handler for received message.
3740 * Context pointer for callback handler.
3743 * 0 on success, a negative errno value otherwise and rte_errno is set.
3746 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf,
3747 struct nlmsghdr *nlh,
3749 mnl_cb_t cb, void *arg)
3751 unsigned int portid = mnl_socket_get_portid(tcf->nl);
3752 uint32_t seq = tcf->seq++;
3758 /* seq 0 is reserved for kernel event-driven notifications. */
3760 nlh->nlmsg_seq = seq;
3762 msglen = nlh->nlmsg_len;
3763 nlh->nlmsg_flags |= NLM_F_ACK;
3765 ret = mnl_socket_sendto(tcf->nl, nlh, msglen);
3766 err = (ret <= 0) ? errno : 0;
3767 nlh = (struct nlmsghdr *)(tcf->buf);
3769 * The following loop postpones non-fatal errors until multipart
3770 * messages are complete.
3774 ret = mnl_socket_recvfrom(tcf->nl, tcf->buf,
3782 ret = mnl_cb_run(nlh, ret, seq, portid,
3789 /* Will receive till end of multipart message */
3790 if (!(nlh->nlmsg_flags & NLM_F_MULTI) ||
3791 nlh->nlmsg_type == NLMSG_DONE)
3801 * Apply flow to E-Switch by sending Netlink message.
3804 * Pointer to Ethernet device.
3805 * @param[in, out] flow
3806 * Pointer to the sub flow.
3808 * Pointer to the error structure.
3811 * 0 on success, a negative errno value otherwise and rte_ernno is set.
3814 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3815 struct rte_flow_error *error)
3817 struct priv *priv = dev->data->dev_private;
3818 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3819 struct mlx5_flow *dev_flow;
3820 struct nlmsghdr *nlh;
3822 dev_flow = LIST_FIRST(&flow->dev_flows);
3823 /* E-Switch flow can't be expanded. */
3824 assert(!LIST_NEXT(dev_flow, next));
3825 nlh = dev_flow->tcf.nlh;
3826 nlh->nlmsg_type = RTM_NEWTFILTER;
3827 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3828 if (!flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL))
3830 return rte_flow_error_set(error, rte_errno,
3831 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3832 "netlink: failed to create TC flow rule");
3836 * Remove flow from E-Switch by sending Netlink message.
3839 * Pointer to Ethernet device.
3840 * @param[in, out] flow
3841 * Pointer to the sub flow.
3844 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3846 struct priv *priv = dev->data->dev_private;
3847 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3848 struct mlx5_flow *dev_flow;
3849 struct nlmsghdr *nlh;
3853 dev_flow = LIST_FIRST(&flow->dev_flows);
3856 /* E-Switch flow can't be expanded. */
3857 assert(!LIST_NEXT(dev_flow, next));
3858 nlh = dev_flow->tcf.nlh;
3859 nlh->nlmsg_type = RTM_DELTFILTER;
3860 nlh->nlmsg_flags = NLM_F_REQUEST;
3861 flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL);
3865 * Remove flow from E-Switch and release resources of the device flow.
3868 * Pointer to Ethernet device.
3869 * @param[in, out] flow
3870 * Pointer to the sub flow.
3873 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3875 struct mlx5_flow *dev_flow;
3879 flow_tcf_remove(dev, flow);
3880 if (flow->counter) {
3881 if (--flow->counter->ref_cnt == 0) {
3882 rte_free(flow->counter);
3883 flow->counter = NULL;
3886 dev_flow = LIST_FIRST(&flow->dev_flows);
3889 /* E-Switch flow can't be expanded. */
3890 assert(!LIST_NEXT(dev_flow, next));
3891 LIST_REMOVE(dev_flow, next);
3896 * Helper routine for figuring the space size required for a parse buffer.
3899 * array of values to use.
3901 * Current location in array.
3903 * Value to compare with.
3906 * The maximum between the given value and the array value on index.
3909 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
3911 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
3915 * Parse rtnetlink message attributes filling the attribute table with the info
3919 * Attribute table to be filled.
3921 * Maxinum entry in the attribute table.
3923 * The attributes section in the message to be parsed.
3925 * The length of the attributes section in the message.
3928 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
3929 struct rtattr *rta, int len)
3931 unsigned short type;
3932 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3933 while (RTA_OK(rta, len)) {
3934 type = rta->rta_type;
3935 if (type <= max && !tb[type])
3937 rta = RTA_NEXT(rta, len);
3942 * Extract flow counters from flower action.
3945 * flower action stats properties in the Netlink message received.
3947 * The backward sequence of rta_types, as written in the attribute table,
3948 * we need to traverse in order to get to the requested object.
3950 * Current location in rta_type table.
3952 * data holding the count statistics of the rte_flow retrieved from
3956 * 0 if data was found and retrieved, -1 otherwise.
3959 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
3960 uint16_t rta_type[], int idx,
3961 struct gnet_stats_basic *data)
3963 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
3965 struct rtattr *tbs[tca_stats_max + 1];
3967 if (rta == NULL || idx < 0)
3969 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
3970 RTA_DATA(rta), RTA_PAYLOAD(rta));
3971 switch (rta_type[idx]) {
3972 case TCA_STATS_BASIC:
3973 if (tbs[TCA_STATS_BASIC]) {
3974 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
3975 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
3987 * Parse flower single action retrieving the requested action attribute,
3991 * flower action properties in the Netlink message received.
3993 * The backward sequence of rta_types, as written in the attribute table,
3994 * we need to traverse in order to get to the requested object.
3996 * Current location in rta_type table.
3998 * Count statistics retrieved from the message query.
4001 * 0 if data was found and retrieved, -1 otherwise.
4004 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
4005 uint16_t rta_type[], int idx, void *data)
4007 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
4008 struct rtattr *tb[tca_act_max + 1];
4010 if (arg == NULL || idx < 0)
4012 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
4013 RTA_DATA(arg), RTA_PAYLOAD(arg));
4014 if (tb[TCA_ACT_KIND] == NULL)
4016 switch (rta_type[idx]) {
4018 if (tb[TCA_ACT_STATS])
4019 return flow_tcf_nl_action_stats_parse_and_get
4022 (struct gnet_stats_basic *)data);
4031 * Parse flower action section in the message retrieving the requested
4032 * attribute from the first action that provides it.
4035 * flower section in the Netlink message received.
4037 * The backward sequence of rta_types, as written in the attribute table,
4038 * we need to traverse in order to get to the requested object.
4040 * Current location in rta_type table.
4042 * data retrieved from the message query.
4045 * 0 if data was found and retrieved, -1 otherwise.
4048 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
4049 uint16_t rta_type[], int idx, void *data)
4051 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
4054 if (arg == NULL || idx < 0)
4056 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
4057 RTA_DATA(arg), RTA_PAYLOAD(arg));
4058 switch (rta_type[idx]) {
4060 * flow counters are stored in the actions defined by the flow
4061 * and not in the flow itself, therefore we need to traverse the
4062 * flower chain of actions in search for them.
4064 * Note that the index is not decremented here.
4067 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
4069 !flow_tcf_nl_parse_one_action_and_get(tb[i],
4082 * Parse flower classifier options in the message, retrieving the requested
4083 * attribute if found.
4086 * flower section in the Netlink message received.
4088 * The backward sequence of rta_types, as written in the attribute table,
4089 * we need to traverse in order to get to the requested object.
4091 * Current location in rta_type table.
4093 * data retrieved from the message query.
4096 * 0 if data was found and retrieved, -1 otherwise.
4099 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
4100 uint16_t rta_type[], int idx, void *data)
4102 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
4104 struct rtattr *tb[tca_flower_max + 1];
4106 if (!opt || idx < 0)
4108 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
4109 RTA_DATA(opt), RTA_PAYLOAD(opt));
4110 switch (rta_type[idx]) {
4111 case TCA_FLOWER_ACT:
4112 if (tb[TCA_FLOWER_ACT])
4113 return flow_tcf_nl_action_parse_and_get
4114 (tb[TCA_FLOWER_ACT],
4115 rta_type, --idx, data);
4124 * Parse Netlink reply on filter query, retrieving the flow counters.
4127 * Message received from Netlink.
4129 * The backward sequence of rta_types, as written in the attribute table,
4130 * we need to traverse in order to get to the requested object.
4132 * Current location in rta_type table.
4134 * data retrieved from the message query.
4137 * 0 if data was found and retrieved, -1 otherwise.
4140 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
4141 uint16_t rta_type[], int idx, void *data)
4143 struct nlmsghdr *nlh = cnlh;
4144 struct tcmsg *t = NLMSG_DATA(nlh);
4145 int len = nlh->nlmsg_len;
4146 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
4147 struct rtattr *tb[tca_max + 1];
4151 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
4152 nlh->nlmsg_type != RTM_GETTFILTER &&
4153 nlh->nlmsg_type != RTM_DELTFILTER)
4155 len -= NLMSG_LENGTH(sizeof(*t));
4158 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
4159 /* Not a TC flower flow - bail out */
4160 if (!tb[TCA_KIND] ||
4161 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
4163 switch (rta_type[idx]) {
4165 if (tb[TCA_OPTIONS])
4166 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
4177 * A callback to parse Netlink reply on TC flower query.
4180 * Message received from Netlink.
4182 * Pointer to data area to be filled by the parsing routine.
4183 * assumed to be a pinter to struct flow_tcf_stats_basic.
4189 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
4192 * The backward sequence of rta_types to pass in order to get
4195 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
4196 TCA_FLOWER_ACT, TCA_OPTIONS };
4197 struct flow_tcf_stats_basic *sb_data = data;
4199 const struct nlmsghdr *c;
4200 struct nlmsghdr *nc;
4201 } tnlh = { .c = nlh };
4203 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
4204 RTE_DIM(rta_type) - 1,
4205 (void *)&sb_data->counters))
4206 sb_data->valid = true;
4211 * Query a TC flower rule for its statistics via netlink.
4214 * Pointer to Ethernet device.
4216 * Pointer to the sub flow.
4218 * data retrieved by the query.
4220 * Perform verbose error reporting if not NULL.
4223 * 0 on success, a negative errno value otherwise and rte_errno is set.
4226 flow_tcf_query_count(struct rte_eth_dev *dev,
4227 struct rte_flow *flow,
4229 struct rte_flow_error *error)
4231 struct flow_tcf_stats_basic sb_data = { 0 };
4232 struct rte_flow_query_count *qc = data;
4233 struct priv *priv = dev->data->dev_private;
4234 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
4235 struct mnl_socket *nl = ctx->nl;
4236 struct mlx5_flow *dev_flow;
4237 struct nlmsghdr *nlh;
4238 uint32_t seq = priv->tcf_context->seq++;
4242 dev_flow = LIST_FIRST(&flow->dev_flows);
4243 /* E-Switch flow can't be expanded. */
4244 assert(!LIST_NEXT(dev_flow, next));
4245 if (!dev_flow->flow->counter)
4247 nlh = dev_flow->tcf.nlh;
4248 nlh->nlmsg_type = RTM_GETTFILTER;
4249 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
4250 nlh->nlmsg_seq = seq;
4251 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
4254 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
4257 ret = mnl_cb_run(ctx->buf, ret, seq,
4258 mnl_socket_get_portid(nl),
4259 flow_tcf_nl_message_get_stats_basic,
4262 /* Return the delta from last reset. */
4263 if (sb_data.valid) {
4264 /* Return the delta from last reset. */
4267 qc->hits = sb_data.counters.packets - flow->counter->hits;
4268 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
4270 flow->counter->hits = sb_data.counters.packets;
4271 flow->counter->bytes = sb_data.counters.bytes;
4275 return rte_flow_error_set(error, EINVAL,
4276 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4278 "flow does not have counter");
4280 return rte_flow_error_set
4281 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4282 NULL, "netlink: failed to read flow rule counters");
4284 return rte_flow_error_set
4285 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4286 NULL, "counters are not available.");
4292 * @see rte_flow_query()
4296 flow_tcf_query(struct rte_eth_dev *dev,
4297 struct rte_flow *flow,
4298 const struct rte_flow_action *actions,
4300 struct rte_flow_error *error)
4304 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4305 switch (actions->type) {
4306 case RTE_FLOW_ACTION_TYPE_VOID:
4308 case RTE_FLOW_ACTION_TYPE_COUNT:
4309 ret = flow_tcf_query_count(dev, flow, data, error);
4312 return rte_flow_error_set(error, ENOTSUP,
4313 RTE_FLOW_ERROR_TYPE_ACTION,
4315 "action not supported");
4321 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
4322 .validate = flow_tcf_validate,
4323 .prepare = flow_tcf_prepare,
4324 .translate = flow_tcf_translate,
4325 .apply = flow_tcf_apply,
4326 .remove = flow_tcf_remove,
4327 .destroy = flow_tcf_destroy,
4328 .query = flow_tcf_query,
4332 * Create and configure a libmnl socket for Netlink flow rules.
4335 * A valid libmnl socket object pointer on success, NULL otherwise and
4338 static struct mnl_socket *
4339 flow_tcf_mnl_socket_create(void)
4341 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
4344 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
4346 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
4351 mnl_socket_close(nl);
4356 * Destroy a libmnl socket.
4359 * Libmnl socket of the @p NETLINK_ROUTE kind.
4362 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
4365 mnl_socket_close(nl);
4369 * Initialize ingress qdisc of a given network interface.
4372 * Pointer to tc-flower context to use.
4374 * Index of network interface to initialize.
4376 * Perform verbose error reporting if not NULL.
4379 * 0 on success, a negative errno value otherwise and rte_errno is set.
4382 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
4383 unsigned int ifindex, struct rte_flow_error *error)
4385 struct nlmsghdr *nlh;
4387 alignas(struct nlmsghdr)
4388 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
4390 /* Destroy existing ingress qdisc and everything attached to it. */
4391 nlh = mnl_nlmsg_put_header(buf);
4392 nlh->nlmsg_type = RTM_DELQDISC;
4393 nlh->nlmsg_flags = NLM_F_REQUEST;
4394 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
4395 tcm->tcm_family = AF_UNSPEC;
4396 tcm->tcm_ifindex = ifindex;
4397 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
4398 tcm->tcm_parent = TC_H_INGRESS;
4399 /* Ignore errors when qdisc is already absent. */
4400 if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL) &&
4401 rte_errno != EINVAL && rte_errno != ENOENT)
4402 return rte_flow_error_set(error, rte_errno,
4403 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4404 "netlink: failed to remove ingress"
4406 /* Create fresh ingress qdisc. */
4407 nlh = mnl_nlmsg_put_header(buf);
4408 nlh->nlmsg_type = RTM_NEWQDISC;
4409 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
4410 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
4411 tcm->tcm_family = AF_UNSPEC;
4412 tcm->tcm_ifindex = ifindex;
4413 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
4414 tcm->tcm_parent = TC_H_INGRESS;
4415 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
4416 if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL))
4417 return rte_flow_error_set(error, rte_errno,
4418 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4419 "netlink: failed to create ingress"
4425 * Create libmnl context for Netlink flow rules.
4428 * A valid libmnl socket object pointer on success, NULL otherwise and
4431 struct mlx5_flow_tcf_context *
4432 mlx5_flow_tcf_context_create(void)
4434 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
4439 ctx->nl = flow_tcf_mnl_socket_create();
4442 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
4443 ctx->buf = rte_zmalloc(__func__,
4444 ctx->buf_size, sizeof(uint32_t));
4447 ctx->seq = random();
4450 mlx5_flow_tcf_context_destroy(ctx);
4455 * Destroy a libmnl context.
4458 * Libmnl socket of the @p NETLINK_ROUTE kind.
4461 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
4465 flow_tcf_mnl_socket_destroy(ctx->nl);