1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
118 #include <linux/tc_act/tc_tunnel_key.h>
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
142 struct tc_tunnel_key {
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
163 #ifndef TCA_CLS_FLAGS_IN_HW
164 #define TCA_CLS_FLAGS_IN_HW (1 << 2)
166 #ifndef HAVE_TCA_CHAIN
169 #ifndef HAVE_TCA_FLOWER_ACT
170 #define TCA_FLOWER_ACT 3
172 #ifndef HAVE_TCA_FLOWER_FLAGS
173 #define TCA_FLOWER_FLAGS 22
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
176 #define TCA_FLOWER_KEY_ETH_TYPE 8
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
179 #define TCA_FLOWER_KEY_ETH_DST 4
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
182 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
185 #define TCA_FLOWER_KEY_ETH_SRC 6
187 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
188 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
190 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
191 #define TCA_FLOWER_KEY_IP_PROTO 9
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
194 #define TCA_FLOWER_KEY_IPV4_SRC 10
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
197 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
200 #define TCA_FLOWER_KEY_IPV4_DST 12
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
203 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
206 #define TCA_FLOWER_KEY_IPV6_SRC 14
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
209 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
212 #define TCA_FLOWER_KEY_IPV6_DST 16
214 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
215 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
218 #define TCA_FLOWER_KEY_TCP_SRC 18
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
221 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
224 #define TCA_FLOWER_KEY_TCP_DST 19
226 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
227 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
230 #define TCA_FLOWER_KEY_UDP_SRC 20
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
233 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
236 #define TCA_FLOWER_KEY_UDP_DST 21
238 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
239 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
242 #define TCA_FLOWER_KEY_VLAN_ID 23
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
245 #define TCA_FLOWER_KEY_VLAN_PRIO 24
247 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
248 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
251 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
257 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
263 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
269 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
275 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
281 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
286 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
287 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
290 #define TCA_FLOWER_KEY_TCP_FLAGS 71
292 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
293 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
295 #ifndef HAVE_TC_ACT_GOTO_CHAIN
296 #define TC_ACT_GOTO_CHAIN 0x20000000
299 #ifndef IPV6_ADDR_LEN
300 #define IPV6_ADDR_LEN 16
303 #ifndef IPV4_ADDR_LEN
304 #define IPV4_ADDR_LEN 4
308 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
315 #ifndef TCA_ACT_MAX_PRIO
316 #define TCA_ACT_MAX_PRIO 32
319 /** Parameters of VXLAN devices created by driver. */
320 #define MLX5_VXLAN_DEFAULT_VNI 1
321 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
323 /** Tunnel action type, used for @p type in header structure. */
324 enum flow_tcf_tunact_type {
325 FLOW_TCF_TUNACT_VXLAN_DECAP,
326 FLOW_TCF_TUNACT_VXLAN_ENCAP,
329 /** Flags used for @p mask in tunnel action encap descriptors. */
330 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
331 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
332 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
333 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
334 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
335 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
336 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
337 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
338 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
341 * Structure for holding netlink context.
342 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
343 * Using this (8KB) buffer size ensures that netlink messages will never be
346 struct mlx5_flow_tcf_context {
347 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
348 uint32_t seq; /* Message sequence number. */
349 uint32_t buf_size; /* Message buffer size. */
350 uint8_t *buf; /* Message buffer. */
354 * Neigh rule structure. The neigh rule is applied via Netlink to
355 * outer tunnel iface in order to provide destination MAC address
356 * for the VXLAN encapsultion. The neigh rule is implicitly related
357 * to the Flow itself and can be shared by multiple Flows.
359 struct tcf_neigh_rule {
360 LIST_ENTRY(tcf_neigh_rule) next;
362 struct ether_addr eth;
369 uint8_t dst[IPV6_ADDR_LEN];
375 * Local rule structure. The local rule is applied via Netlink to
376 * outer tunnel iface in order to provide local and peer IP addresses
377 * of the VXLAN tunnel for encapsulation. The local rule is implicitly
378 * related to the Flow itself and can be shared by multiple Flows.
380 struct tcf_local_rule {
381 LIST_ENTRY(tcf_local_rule) next;
390 uint8_t dst[IPV6_ADDR_LEN];
391 uint8_t src[IPV6_ADDR_LEN];
396 /** Outer interface VXLAN encapsulation rules container. */
398 LIST_ENTRY(tcf_irule) next;
399 LIST_HEAD(, tcf_neigh_rule) neigh;
400 LIST_HEAD(, tcf_local_rule) local;
402 unsigned int ifouter; /**< Own interface index. */
405 /** VXLAN virtual netdev. */
407 LIST_ENTRY(tcf_vtep) next;
409 unsigned int ifindex; /**< Own interface index. */
414 /** Tunnel descriptor header, common for all tunnel types. */
415 struct flow_tcf_tunnel_hdr {
416 uint32_t type; /**< Tunnel action type. */
417 struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
418 unsigned int ifindex_org; /**< Original dst/src interface */
419 unsigned int *ifindex_ptr; /**< Interface ptr in message. */
422 struct flow_tcf_vxlan_decap {
423 struct flow_tcf_tunnel_hdr hdr;
427 struct flow_tcf_vxlan_encap {
428 struct flow_tcf_tunnel_hdr hdr;
429 struct tcf_irule *iface;
432 struct ether_addr dst;
433 struct ether_addr src;
441 uint8_t dst[IPV6_ADDR_LEN];
442 uint8_t src[IPV6_ADDR_LEN];
454 /** Structure used when extracting the values of a flow counters
455 * from a netlink message.
457 struct flow_tcf_stats_basic {
459 struct gnet_stats_basic counters;
462 /** Empty masks for known item types. */
464 struct rte_flow_item_port_id port_id;
465 struct rte_flow_item_eth eth;
466 struct rte_flow_item_vlan vlan;
467 struct rte_flow_item_ipv4 ipv4;
468 struct rte_flow_item_ipv6 ipv6;
469 struct rte_flow_item_tcp tcp;
470 struct rte_flow_item_udp udp;
471 struct rte_flow_item_vxlan vxlan;
472 } flow_tcf_mask_empty = {
476 /** Supported masks for known item types. */
477 static const struct {
478 struct rte_flow_item_port_id port_id;
479 struct rte_flow_item_eth eth;
480 struct rte_flow_item_vlan vlan;
481 struct rte_flow_item_ipv4 ipv4;
482 struct rte_flow_item_ipv6 ipv6;
483 struct rte_flow_item_tcp tcp;
484 struct rte_flow_item_udp udp;
485 struct rte_flow_item_vxlan vxlan;
486 } flow_tcf_mask_supported = {
491 .type = RTE_BE16(0xffff),
492 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
493 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
496 /* PCP and VID only, no DEI. */
497 .tci = RTE_BE16(0xefff),
498 .inner_type = RTE_BE16(0xffff),
501 .next_proto_id = 0xff,
502 .src_addr = RTE_BE32(0xffffffff),
503 .dst_addr = RTE_BE32(0xffffffff),
508 "\xff\xff\xff\xff\xff\xff\xff\xff"
509 "\xff\xff\xff\xff\xff\xff\xff\xff",
511 "\xff\xff\xff\xff\xff\xff\xff\xff"
512 "\xff\xff\xff\xff\xff\xff\xff\xff",
515 .src_port = RTE_BE16(0xffff),
516 .dst_port = RTE_BE16(0xffff),
520 .src_port = RTE_BE16(0xffff),
521 .dst_port = RTE_BE16(0xffff),
524 .vni = "\xff\xff\xff",
528 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
529 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
530 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
531 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
532 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
534 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
536 /** DPDK port to network interface index (ifindex) conversion. */
537 struct flow_tcf_ptoi {
538 uint16_t port_id; /**< DPDK port ID. */
539 unsigned int ifindex; /**< Network interface index. */
542 /* Due to a limitation on driver/FW. */
543 #define MLX5_TCF_GROUP_ID_MAX 3
546 * Due to a limitation on driver/FW, priority ranges from 1 to 16 in kernel.
547 * Priority in rte_flow attribute starts from 0 and is added by 1 in
548 * translation. This is subject to be changed to determine the max priority
549 * based on trial-and-error like Verbs driver once the restriction is lifted or
550 * the range is extended.
552 #define MLX5_TCF_GROUP_PRIORITY_MAX 15
554 #define MLX5_TCF_FATE_ACTIONS \
555 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
556 MLX5_FLOW_ACTION_JUMP)
558 #define MLX5_TCF_VLAN_ACTIONS \
559 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
560 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
562 #define MLX5_TCF_VXLAN_ACTIONS \
563 (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
565 #define MLX5_TCF_PEDIT_ACTIONS \
566 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
567 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
568 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
569 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
570 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
572 #define MLX5_TCF_CONFIG_ACTIONS \
573 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
574 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
575 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
576 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
578 #define MAX_PEDIT_KEYS 128
579 #define SZ_PEDIT_KEY_VAL 4
581 #define NUM_OF_PEDIT_KEYS(sz) \
582 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
584 struct pedit_key_ex {
585 enum pedit_header_type htype;
589 struct pedit_parser {
590 struct tc_pedit_sel sel;
591 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
592 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
596 * Create space for using the implicitly created TC flow counter.
599 * Pointer to the Ethernet device structure.
602 * A pointer to the counter data structure, NULL otherwise and
605 static struct mlx5_flow_counter *
606 flow_tcf_counter_new(void)
608 struct mlx5_flow_counter *cnt;
611 * eswitch counter cannot be shared and its id is unknown.
612 * currently returning all with id 0.
613 * in the future maybe better to switch to unique numbers.
615 struct mlx5_flow_counter tmpl = {
618 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
624 /* Implicit counter, do not add to list. */
629 * Set pedit key of MAC address
632 * pointer to action specification
633 * @param[in,out] p_parser
634 * pointer to pedit_parser
637 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
638 struct pedit_parser *p_parser)
640 int idx = p_parser->sel.nkeys;
641 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
642 offsetof(struct ether_hdr, s_addr) :
643 offsetof(struct ether_hdr, d_addr);
644 const struct rte_flow_action_set_mac *conf =
645 (const struct rte_flow_action_set_mac *)actions->conf;
647 p_parser->keys[idx].off = off;
648 p_parser->keys[idx].mask = ~UINT32_MAX;
649 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
650 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
651 memcpy(&p_parser->keys[idx].val,
652 conf->mac_addr, SZ_PEDIT_KEY_VAL);
654 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
655 p_parser->keys[idx].mask = 0xFFFF0000;
656 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
657 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
658 memcpy(&p_parser->keys[idx].val,
659 conf->mac_addr + SZ_PEDIT_KEY_VAL,
660 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
661 p_parser->sel.nkeys = (++idx);
665 * Set pedit key of decrease/set ttl
668 * pointer to action specification
669 * @param[in,out] p_parser
670 * pointer to pedit_parser
671 * @param[in] item_flags
672 * flags of all items presented
675 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
676 struct pedit_parser *p_parser,
679 int idx = p_parser->sel.nkeys;
681 p_parser->keys[idx].mask = 0xFFFFFF00;
682 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
683 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
684 p_parser->keys[idx].off =
685 offsetof(struct ipv4_hdr, time_to_live);
687 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
688 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
689 p_parser->keys[idx].off =
690 offsetof(struct ipv6_hdr, hop_limits);
692 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
693 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
694 p_parser->keys[idx].val = 0x000000FF;
696 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
697 p_parser->keys[idx].val =
698 (__u32)((const struct rte_flow_action_set_ttl *)
699 actions->conf)->ttl_value;
701 p_parser->sel.nkeys = (++idx);
705 * Set pedit key of transport (TCP/UDP) port value
708 * pointer to action specification
709 * @param[in,out] p_parser
710 * pointer to pedit_parser
711 * @param[in] item_flags
712 * flags of all items presented
715 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
716 struct pedit_parser *p_parser,
719 int idx = p_parser->sel.nkeys;
721 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
722 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
723 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
724 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
725 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
726 /* offset of src/dst port is same for TCP and UDP */
727 p_parser->keys[idx].off =
728 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
729 offsetof(struct tcp_hdr, src_port) :
730 offsetof(struct tcp_hdr, dst_port);
731 p_parser->keys[idx].mask = 0xFFFF0000;
732 p_parser->keys[idx].val =
733 (__u32)((const struct rte_flow_action_set_tp *)
734 actions->conf)->port;
735 p_parser->sel.nkeys = (++idx);
739 * Set pedit key of ipv6 address
742 * pointer to action specification
743 * @param[in,out] p_parser
744 * pointer to pedit_parser
747 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
748 struct pedit_parser *p_parser)
750 int idx = p_parser->sel.nkeys;
751 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
753 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
754 offsetof(struct ipv6_hdr, src_addr) :
755 offsetof(struct ipv6_hdr, dst_addr);
756 const struct rte_flow_action_set_ipv6 *conf =
757 (const struct rte_flow_action_set_ipv6 *)actions->conf;
759 for (int i = 0; i < keys; i++, idx++) {
760 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
761 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
762 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
763 p_parser->keys[idx].mask = ~UINT32_MAX;
764 memcpy(&p_parser->keys[idx].val,
765 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
768 p_parser->sel.nkeys += keys;
772 * Set pedit key of ipv4 address
775 * pointer to action specification
776 * @param[in,out] p_parser
777 * pointer to pedit_parser
780 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
781 struct pedit_parser *p_parser)
783 int idx = p_parser->sel.nkeys;
785 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
786 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
787 p_parser->keys[idx].off =
788 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
789 offsetof(struct ipv4_hdr, src_addr) :
790 offsetof(struct ipv4_hdr, dst_addr);
791 p_parser->keys[idx].mask = ~UINT32_MAX;
792 p_parser->keys[idx].val =
793 ((const struct rte_flow_action_set_ipv4 *)
794 actions->conf)->ipv4_addr;
795 p_parser->sel.nkeys = (++idx);
799 * Create the pedit's na attribute in netlink message
800 * on pre-allocate message buffer
803 * pointer to pre-allocated netlink message buffer
804 * @param[in,out] actions
805 * pointer to pointer of actions specification.
806 * @param[in,out] action_flags
807 * pointer to actions flags
808 * @param[in] item_flags
809 * flags of all item presented
812 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
813 const struct rte_flow_action **actions,
816 struct pedit_parser p_parser;
817 struct nlattr *na_act_options;
818 struct nlattr *na_pedit_keys;
820 memset(&p_parser, 0, sizeof(p_parser));
821 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
822 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
823 /* all modify header actions should be in one tc-pedit action */
824 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
825 switch ((*actions)->type) {
826 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
827 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
828 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
830 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
831 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
832 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
834 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
835 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
836 flow_tcf_pedit_key_set_tp_port(*actions,
837 &p_parser, item_flags);
839 case RTE_FLOW_ACTION_TYPE_SET_TTL:
840 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
841 flow_tcf_pedit_key_set_dec_ttl(*actions,
842 &p_parser, item_flags);
844 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
845 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
846 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
849 goto pedit_mnl_msg_done;
853 p_parser.sel.action = TC_ACT_PIPE;
854 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
855 sizeof(p_parser.sel) +
856 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
859 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
860 for (int i = 0; i < p_parser.sel.nkeys; i++) {
861 struct nlattr *na_pedit_key =
862 mnl_attr_nest_start(nl,
863 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
864 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
865 p_parser.keys_ex[i].htype);
866 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
867 p_parser.keys_ex[i].cmd);
868 mnl_attr_nest_end(nl, na_pedit_key);
870 mnl_attr_nest_end(nl, na_pedit_keys);
871 mnl_attr_nest_end(nl, na_act_options);
876 * Calculate max memory size of one TC-pedit actions.
877 * One TC-pedit action can contain set of keys each defining
878 * a rewrite element (rte_flow action)
880 * @param[in,out] actions
881 * actions specification.
882 * @param[in,out] action_flags
884 * @param[in,out] size
887 * Max memory size of one TC-pedit action
890 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
891 uint64_t *action_flags)
897 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
898 SZ_NLATTR_STRZ_OF("pedit") +
899 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
900 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
901 switch ((*actions)->type) {
902 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
903 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
904 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
906 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
907 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
908 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
910 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
911 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
912 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
914 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
915 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
916 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
918 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
919 /* TCP is as same as UDP */
920 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
921 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
923 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
924 /* TCP is as same as UDP */
925 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
926 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
928 case RTE_FLOW_ACTION_TYPE_SET_TTL:
929 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
930 flags |= MLX5_FLOW_ACTION_SET_TTL;
932 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
933 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
934 flags |= MLX5_FLOW_ACTION_DEC_TTL;
936 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
937 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
938 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
940 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
941 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
942 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
945 goto get_pedit_action_size_done;
948 get_pedit_action_size_done:
949 /* TCA_PEDIT_PARAMS_EX */
951 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
952 keys * sizeof(struct tc_pedit_key));
953 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
955 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
956 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
957 SZ_NLATTR_DATA_OF(2));
958 (*action_flags) |= flags;
964 * Retrieve mask for pattern item.
966 * This function does basic sanity checks on a pattern item in order to
967 * return the most appropriate mask for it.
970 * Item specification.
971 * @param[in] mask_default
972 * Default mask for pattern item as specified by the flow API.
973 * @param[in] mask_supported
974 * Mask fields supported by the implementation.
975 * @param[in] mask_empty
976 * Empty mask to return when there is no specification.
978 * Perform verbose error reporting if not NULL.
981 * Either @p item->mask or one of the mask parameters on success, NULL
982 * otherwise and rte_errno is set.
985 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
986 const void *mask_supported, const void *mask_empty,
987 size_t mask_size, struct rte_flow_error *error)
992 /* item->last and item->mask cannot exist without item->spec. */
993 if (!item->spec && (item->mask || item->last)) {
994 rte_flow_error_set(error, EINVAL,
995 RTE_FLOW_ERROR_TYPE_ITEM, item,
996 "\"mask\" or \"last\" field provided without"
997 " a corresponding \"spec\"");
1000 /* No spec, no mask, no problem. */
1003 mask = item->mask ? item->mask : mask_default;
1006 * Single-pass check to make sure that:
1007 * - Mask is supported, no bits are set outside mask_supported.
1008 * - Both item->spec and item->last are included in mask.
1010 for (i = 0; i != mask_size; ++i) {
1013 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
1014 ((const uint8_t *)mask_supported)[i]) {
1015 rte_flow_error_set(error, ENOTSUP,
1016 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1017 "unsupported field found"
1022 (((const uint8_t *)item->spec)[i] & mask[i]) !=
1023 (((const uint8_t *)item->last)[i] & mask[i])) {
1024 rte_flow_error_set(error, EINVAL,
1025 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
1027 "range between \"spec\" and \"last\""
1028 " not comprised in \"mask\"");
1036 * Build a conversion table between port ID and ifindex.
1039 * Pointer to Ethernet device.
1041 * Pointer to ptoi table.
1043 * Size of ptoi table provided.
1046 * Size of ptoi table filled.
1049 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
1052 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1053 uint16_t port_id[n + 1];
1055 unsigned int own = 0;
1057 /* At least one port is needed when no switch domain is present. */
1060 port_id[0] = dev->data->port_id;
1062 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1066 for (i = 0; i != n; ++i) {
1067 struct rte_eth_dev_info dev_info;
1069 rte_eth_dev_info_get(port_id[i], &dev_info);
1070 if (port_id[i] == dev->data->port_id)
1072 ptoi[i].port_id = port_id[i];
1073 ptoi[i].ifindex = dev_info.if_index;
1075 /* Ensure first entry of ptoi[] is the current device. */
1078 ptoi[0] = ptoi[own];
1079 ptoi[own] = ptoi[n];
1081 /* An entry with zero ifindex terminates ptoi[]. */
1082 ptoi[n].port_id = 0;
1083 ptoi[n].ifindex = 0;
1088 * Verify the @p attr will be correctly understood by the E-switch.
1091 * Pointer to flow attributes
1093 * Pointer to error structure.
1096 * 0 on success, a negative errno value otherwise and rte_errno is set.
1099 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
1100 struct rte_flow_error *error)
1103 * Supported attributes: groups, some priorities and ingress only.
1104 * group is supported only if kernel supports chain. Don't care about
1105 * transfer as it is the caller's problem.
1107 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
1108 return rte_flow_error_set(error, ENOTSUP,
1109 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
1110 "group ID larger than "
1111 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
1112 " isn't supported");
1113 else if (attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
1114 return rte_flow_error_set(error, ENOTSUP,
1115 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1117 "priority more than "
1118 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1119 " is not supported");
1121 return rte_flow_error_set(error, EINVAL,
1122 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1123 attr, "only ingress is supported");
1125 return rte_flow_error_set(error, ENOTSUP,
1126 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1127 attr, "egress is not supported");
1132 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
1133 * The routine checks the L2 fields to be used in encapsulation header.
1136 * Pointer to the item structure.
1138 * Pointer to the error structure.
1141 * 0 on success, a negative errno value otherwise and rte_errno is set.
1144 flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
1145 struct rte_flow_error *error)
1147 const struct rte_flow_item_eth *spec = item->spec;
1148 const struct rte_flow_item_eth *mask = item->mask;
1152 * Specification for L2 addresses can be empty
1153 * because these ones are optional and not
1154 * required directly by tc rule. Kernel tries
1155 * to resolve these ones on its own
1160 /* If mask is not specified use the default one. */
1161 mask = &rte_flow_item_eth_mask;
1163 if (memcmp(&mask->dst,
1164 &flow_tcf_mask_empty.eth.dst,
1165 sizeof(flow_tcf_mask_empty.eth.dst))) {
1166 if (memcmp(&mask->dst,
1167 &rte_flow_item_eth_mask.dst,
1168 sizeof(rte_flow_item_eth_mask.dst)))
1169 return rte_flow_error_set
1171 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1172 "no support for partial mask on"
1173 " \"eth.dst\" field");
1175 if (memcmp(&mask->src,
1176 &flow_tcf_mask_empty.eth.src,
1177 sizeof(flow_tcf_mask_empty.eth.src))) {
1178 if (memcmp(&mask->src,
1179 &rte_flow_item_eth_mask.src,
1180 sizeof(rte_flow_item_eth_mask.src)))
1181 return rte_flow_error_set
1183 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1184 "no support for partial mask on"
1185 " \"eth.src\" field");
1187 if (mask->type != RTE_BE16(0x0000)) {
1188 if (mask->type != RTE_BE16(0xffff))
1189 return rte_flow_error_set
1191 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1192 "no support for partial mask on"
1193 " \"eth.type\" field");
1195 "outer ethernet type field"
1196 " cannot be forced for vxlan"
1197 " encapsulation, parameter ignored");
1203 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
1204 * The routine checks the IPv4 fields to be used in encapsulation header.
1207 * Pointer to the item structure.
1209 * Pointer to the error structure.
1212 * 0 on success, a negative errno value otherwise and rte_errno is set.
1215 flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
1216 struct rte_flow_error *error)
1218 const struct rte_flow_item_ipv4 *spec = item->spec;
1219 const struct rte_flow_item_ipv4 *mask = item->mask;
1223 * Specification for IP addresses cannot be empty
1224 * because it is required by tunnel_key parameter.
1226 return rte_flow_error_set(error, EINVAL,
1227 RTE_FLOW_ERROR_TYPE_ITEM, item,
1228 "NULL outer ipv4 address"
1229 " specification for vxlan"
1233 mask = &rte_flow_item_ipv4_mask;
1234 if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1235 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1236 return rte_flow_error_set
1238 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1239 "no support for partial mask on"
1240 " \"ipv4.hdr.dst_addr\" field"
1241 " for vxlan encapsulation");
1242 /* More IPv4 address validations can be put here. */
1245 * Kernel uses the destination IP address to determine
1246 * the routing path and obtain the MAC destination
1247 * address, so IP destination address must be
1248 * specified in the tc rule.
1250 return rte_flow_error_set(error, EINVAL,
1251 RTE_FLOW_ERROR_TYPE_ITEM, item,
1252 "outer ipv4 destination address"
1253 " must be specified for"
1254 " vxlan encapsulation");
1256 if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
1257 if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
1258 return rte_flow_error_set
1260 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1261 "no support for partial mask on"
1262 " \"ipv4.hdr.src_addr\" field"
1263 " for vxlan encapsulation");
1264 /* More IPv4 address validations can be put here. */
1267 * Kernel uses the source IP address to select the
1268 * interface for egress encapsulated traffic, so
1269 * it must be specified in the tc rule.
1271 return rte_flow_error_set(error, EINVAL,
1272 RTE_FLOW_ERROR_TYPE_ITEM, item,
1273 "outer ipv4 source address"
1274 " must be specified for"
1275 " vxlan encapsulation");
1281 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
1282 * The routine checks the IPv6 fields to be used in encapsulation header.
1285 * Pointer to the item structure.
1287 * Pointer to the error structure.
1290 * 0 on success, a negative errno value otherwise and rte_errno is set.
1293 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
1294 struct rte_flow_error *error)
1296 const struct rte_flow_item_ipv6 *spec = item->spec;
1297 const struct rte_flow_item_ipv6 *mask = item->mask;
1301 * Specification for IP addresses cannot be empty
1302 * because it is required by tunnel_key parameter.
1304 return rte_flow_error_set(error, EINVAL,
1305 RTE_FLOW_ERROR_TYPE_ITEM, item,
1306 "NULL outer ipv6 address"
1307 " specification for"
1308 " vxlan encapsulation");
1311 mask = &rte_flow_item_ipv6_mask;
1312 if (memcmp(&mask->hdr.dst_addr,
1313 &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1315 if (memcmp(&mask->hdr.dst_addr,
1316 &rte_flow_item_ipv6_mask.hdr.dst_addr,
1318 return rte_flow_error_set
1320 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1321 "no support for partial mask on"
1322 " \"ipv6.hdr.dst_addr\" field"
1323 " for vxlan encapsulation");
1324 /* More IPv6 address validations can be put here. */
1327 * Kernel uses the destination IP address to determine
1328 * the routing path and obtain the MAC destination
1329 * address (heigh or gate), so IP destination address
1330 * must be specified within the tc rule.
1332 return rte_flow_error_set(error, EINVAL,
1333 RTE_FLOW_ERROR_TYPE_ITEM, item,
1334 "outer ipv6 destination address"
1335 " must be specified for"
1336 " vxlan encapsulation");
1338 if (memcmp(&mask->hdr.src_addr,
1339 &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1341 if (memcmp(&mask->hdr.src_addr,
1342 &rte_flow_item_ipv6_mask.hdr.src_addr,
1344 return rte_flow_error_set
1346 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1347 "no support for partial mask on"
1348 " \"ipv6.hdr.src_addr\" field"
1349 " for vxlan encapsulation");
1350 /* More L3 address validation can be put here. */
1353 * Kernel uses the source IP address to select the
1354 * interface for egress encapsulated traffic, so
1355 * it must be specified in the tc rule.
1357 return rte_flow_error_set(error, EINVAL,
1358 RTE_FLOW_ERROR_TYPE_ITEM, item,
1359 "outer L3 source address"
1360 " must be specified for"
1361 " vxlan encapsulation");
1367 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
1368 * The routine checks the UDP fields to be used in encapsulation header.
1371 * Pointer to the item structure.
1373 * Pointer to the error structure.
1376 * 0 on success, a negative errno value otherwise and rte_errno is set.
1379 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
1380 struct rte_flow_error *error)
1382 const struct rte_flow_item_udp *spec = item->spec;
1383 const struct rte_flow_item_udp *mask = item->mask;
1387 * Specification for UDP ports cannot be empty
1388 * because it is required by tunnel_key parameter.
1390 return rte_flow_error_set(error, EINVAL,
1391 RTE_FLOW_ERROR_TYPE_ITEM, item,
1392 "NULL UDP port specification "
1393 " for vxlan encapsulation");
1396 mask = &rte_flow_item_udp_mask;
1397 if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1398 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1399 return rte_flow_error_set
1401 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1402 "no support for partial mask on"
1403 " \"udp.hdr.dst_port\" field"
1404 " for vxlan encapsulation");
1405 if (!spec->hdr.dst_port)
1406 return rte_flow_error_set
1408 RTE_FLOW_ERROR_TYPE_ITEM, item,
1409 "outer UDP remote port cannot be"
1410 " 0 for vxlan encapsulation");
1412 return rte_flow_error_set(error, EINVAL,
1413 RTE_FLOW_ERROR_TYPE_ITEM, item,
1414 "outer UDP remote port"
1415 " must be specified for"
1416 " vxlan encapsulation");
1418 if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1419 if (mask->hdr.src_port != RTE_BE16(0xffff))
1420 return rte_flow_error_set
1422 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1423 "no support for partial mask on"
1424 " \"udp.hdr.src_port\" field"
1425 " for vxlan encapsulation");
1427 "outer UDP source port cannot be"
1428 " forced for vxlan encapsulation,"
1429 " parameter ignored");
1435 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
1436 * The routine checks the VNIP fields to be used in encapsulation header.
1439 * Pointer to the item structure.
1441 * Pointer to the error structure.
1444 * 0 on success, a negative errno value otherwise and rte_errno is set.
1447 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
1448 struct rte_flow_error *error)
1450 const struct rte_flow_item_vxlan *spec = item->spec;
1451 const struct rte_flow_item_vxlan *mask = item->mask;
1454 /* Outer VNI is required by tunnel_key parameter. */
1455 return rte_flow_error_set(error, EINVAL,
1456 RTE_FLOW_ERROR_TYPE_ITEM, item,
1457 "NULL VNI specification"
1458 " for vxlan encapsulation");
1461 mask = &rte_flow_item_vxlan_mask;
1462 if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
1463 return rte_flow_error_set(error, EINVAL,
1464 RTE_FLOW_ERROR_TYPE_ITEM, item,
1465 "outer VNI must be specified "
1466 "for vxlan encapsulation");
1467 if (mask->vni[0] != 0xff ||
1468 mask->vni[1] != 0xff ||
1469 mask->vni[2] != 0xff)
1470 return rte_flow_error_set(error, ENOTSUP,
1471 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1472 "no support for partial mask on"
1473 " \"vxlan.vni\" field");
1475 if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
1476 return rte_flow_error_set(error, EINVAL,
1477 RTE_FLOW_ERROR_TYPE_ITEM, item,
1478 "vxlan vni cannot be 0");
1483 * Validate VXLAN_ENCAP action item list for E-Switch.
1484 * The routine checks items to be used in encapsulation header.
1487 * Pointer to the VXLAN_ENCAP action structure.
1489 * Pointer to the error structure.
1492 * 0 on success, a negative errno value otherwise and rte_errno is set.
1495 flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
1496 struct rte_flow_error *error)
1498 const struct rte_flow_item *items;
1500 uint32_t item_flags = 0;
1503 return rte_flow_error_set(error, EINVAL,
1504 RTE_FLOW_ERROR_TYPE_ACTION, action,
1505 "Missing vxlan tunnel"
1506 " action configuration");
1507 items = ((const struct rte_flow_action_vxlan_encap *)
1508 action->conf)->definition;
1510 return rte_flow_error_set(error, EINVAL,
1511 RTE_FLOW_ERROR_TYPE_ACTION, action,
1512 "Missing vxlan tunnel"
1513 " encapsulation parameters");
1514 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1515 switch (items->type) {
1516 case RTE_FLOW_ITEM_TYPE_VOID:
1518 case RTE_FLOW_ITEM_TYPE_ETH:
1519 ret = mlx5_flow_validate_item_eth(items, item_flags,
1523 ret = flow_tcf_validate_vxlan_encap_eth(items, error);
1526 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1529 case RTE_FLOW_ITEM_TYPE_IPV4:
1530 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1534 ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
1537 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1539 case RTE_FLOW_ITEM_TYPE_IPV6:
1540 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1544 ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
1547 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1549 case RTE_FLOW_ITEM_TYPE_UDP:
1550 ret = mlx5_flow_validate_item_udp(items, item_flags,
1554 ret = flow_tcf_validate_vxlan_encap_udp(items, error);
1557 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1559 case RTE_FLOW_ITEM_TYPE_VXLAN:
1560 ret = mlx5_flow_validate_item_vxlan(items,
1564 ret = flow_tcf_validate_vxlan_encap_vni(items, error);
1567 item_flags |= MLX5_FLOW_LAYER_VXLAN;
1570 return rte_flow_error_set
1572 RTE_FLOW_ERROR_TYPE_ITEM, items,
1573 "vxlan encap item not supported");
1576 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
1577 return rte_flow_error_set(error, EINVAL,
1578 RTE_FLOW_ERROR_TYPE_ACTION, action,
1579 "no outer IP layer found"
1580 " for vxlan encapsulation");
1581 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1582 return rte_flow_error_set(error, EINVAL,
1583 RTE_FLOW_ERROR_TYPE_ACTION, action,
1584 "no outer UDP layer found"
1585 " for vxlan encapsulation");
1586 if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
1587 return rte_flow_error_set(error, EINVAL,
1588 RTE_FLOW_ERROR_TYPE_ACTION, action,
1589 "no VXLAN VNI found"
1590 " for vxlan encapsulation");
1595 * Validate outer RTE_FLOW_ITEM_TYPE_UDP item if tunnel item
1596 * RTE_FLOW_ITEM_TYPE_VXLAN is present in item list.
1599 * Outer UDP layer item (if any, NULL otherwise).
1601 * Pointer to the error structure.
1604 * 0 on success, a negative errno value otherwise and rte_errno is set.
1607 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
1608 struct rte_flow_error *error)
1610 const struct rte_flow_item_udp *spec = udp->spec;
1611 const struct rte_flow_item_udp *mask = udp->mask;
1615 * Specification for UDP ports cannot be empty
1616 * because it is required as decap parameter.
1618 return rte_flow_error_set(error, EINVAL,
1619 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1620 "NULL UDP port specification"
1621 " for VXLAN decapsulation");
1623 mask = &rte_flow_item_udp_mask;
1624 if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1625 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1626 return rte_flow_error_set
1628 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1629 "no support for partial mask on"
1630 " \"udp.hdr.dst_port\" field");
1631 if (!spec->hdr.dst_port)
1632 return rte_flow_error_set
1634 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1635 "zero decap local UDP port");
1637 return rte_flow_error_set(error, EINVAL,
1638 RTE_FLOW_ERROR_TYPE_ITEM, udp,
1639 "outer UDP destination port must be "
1640 "specified for vxlan decapsulation");
1642 if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1643 if (mask->hdr.src_port != RTE_BE16(0xffff))
1644 return rte_flow_error_set
1646 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1647 "no support for partial mask on"
1648 " \"udp.hdr.src_port\" field");
1650 "outer UDP local port cannot be "
1651 "forced for VXLAN encapsulation, "
1652 "parameter ignored");
1658 * Validate flow for E-Switch.
1661 * Pointer to the priv structure.
1663 * Pointer to the flow attributes.
1665 * Pointer to the list of items.
1666 * @param[in] actions
1667 * Pointer to the list of actions.
1669 * Pointer to the error structure.
1672 * 0 on success, a negative errno value otherwise and rte_errno is set.
1675 flow_tcf_validate(struct rte_eth_dev *dev,
1676 const struct rte_flow_attr *attr,
1677 const struct rte_flow_item items[],
1678 const struct rte_flow_action actions[],
1679 struct rte_flow_error *error)
1682 const struct rte_flow_item_port_id *port_id;
1683 const struct rte_flow_item_eth *eth;
1684 const struct rte_flow_item_vlan *vlan;
1685 const struct rte_flow_item_ipv4 *ipv4;
1686 const struct rte_flow_item_ipv6 *ipv6;
1687 const struct rte_flow_item_tcp *tcp;
1688 const struct rte_flow_item_udp *udp;
1689 const struct rte_flow_item_vxlan *vxlan;
1692 const struct rte_flow_action_port_id *port_id;
1693 const struct rte_flow_action_jump *jump;
1694 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1695 const struct rte_flow_action_of_set_vlan_vid *
1697 const struct rte_flow_action_of_set_vlan_pcp *
1699 const struct rte_flow_action_vxlan_encap *vxlan_encap;
1700 const struct rte_flow_action_set_ipv4 *set_ipv4;
1701 const struct rte_flow_action_set_ipv6 *set_ipv6;
1703 const struct rte_flow_item *outer_udp = NULL;
1704 rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL);
1705 rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL);
1706 rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL);
1707 uint64_t item_flags = 0;
1708 uint64_t action_flags = 0;
1709 uint8_t next_protocol = 0xff;
1710 unsigned int tcm_ifindex = 0;
1711 uint8_t pedit_validated = 0;
1712 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1713 struct rte_eth_dev *port_id_dev = NULL;
1714 bool in_port_id_set;
1717 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1718 PTOI_TABLE_SZ_MAX(dev)));
1719 ret = flow_tcf_validate_attributes(attr, error);
1722 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1724 uint64_t current_action_flag = 0;
1726 switch (actions->type) {
1727 case RTE_FLOW_ACTION_TYPE_VOID:
1729 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1730 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1733 conf.port_id = actions->conf;
1734 if (conf.port_id->original)
1737 for (i = 0; ptoi[i].ifindex; ++i)
1738 if (ptoi[i].port_id == conf.port_id->id)
1740 if (!ptoi[i].ifindex)
1741 return rte_flow_error_set
1743 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1745 "missing data to convert port ID to"
1747 port_id_dev = &rte_eth_devices[conf.port_id->id];
1749 case RTE_FLOW_ACTION_TYPE_JUMP:
1750 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1753 conf.jump = actions->conf;
1754 if (attr->group >= conf.jump->group)
1755 return rte_flow_error_set
1757 RTE_FLOW_ERROR_TYPE_ACTION,
1759 "can jump only to a group forward");
1761 case RTE_FLOW_ACTION_TYPE_DROP:
1762 current_action_flag = MLX5_FLOW_ACTION_DROP;
1764 case RTE_FLOW_ACTION_TYPE_COUNT:
1766 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1767 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1769 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: {
1770 rte_be16_t ethertype;
1772 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1775 conf.of_push_vlan = actions->conf;
1776 ethertype = conf.of_push_vlan->ethertype;
1777 if (ethertype != RTE_BE16(ETH_P_8021Q) &&
1778 ethertype != RTE_BE16(ETH_P_8021AD))
1779 return rte_flow_error_set
1781 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1782 "vlan push TPID must be "
1783 "802.1Q or 802.1AD");
1786 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1787 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1788 return rte_flow_error_set
1790 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1791 "vlan modify is not supported,"
1792 " set action must follow push action");
1793 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1795 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1796 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1797 return rte_flow_error_set
1799 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1800 "vlan modify is not supported,"
1801 " set action must follow push action");
1802 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1804 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
1805 current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
1807 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
1808 ret = flow_tcf_validate_vxlan_encap(actions, error);
1811 current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
1813 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1814 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1816 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1817 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1819 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1820 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1822 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1823 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1825 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1826 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1828 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1829 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1831 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1832 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1834 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1835 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1837 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1838 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1840 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1841 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1844 return rte_flow_error_set(error, ENOTSUP,
1845 RTE_FLOW_ERROR_TYPE_ACTION,
1847 "action not supported");
1849 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1851 return rte_flow_error_set
1853 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1855 "action configuration not set");
1857 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1859 return rte_flow_error_set(error, ENOTSUP,
1860 RTE_FLOW_ERROR_TYPE_ACTION,
1862 "set actions should be "
1863 "listed successively");
1864 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1865 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1866 pedit_validated = 1;
1867 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1868 (action_flags & MLX5_TCF_FATE_ACTIONS))
1869 return rte_flow_error_set(error, EINVAL,
1870 RTE_FLOW_ERROR_TYPE_ACTION,
1872 "can't have multiple fate"
1874 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1875 (action_flags & MLX5_TCF_VXLAN_ACTIONS))
1876 return rte_flow_error_set(error, EINVAL,
1877 RTE_FLOW_ERROR_TYPE_ACTION,
1879 "can't have multiple vxlan"
1881 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1882 (action_flags & MLX5_TCF_VLAN_ACTIONS))
1883 return rte_flow_error_set(error, ENOTSUP,
1884 RTE_FLOW_ERROR_TYPE_ACTION,
1886 "can't have vxlan and vlan"
1887 " actions in the same rule");
1888 action_flags |= current_action_flag;
1890 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1893 switch (items->type) {
1894 case RTE_FLOW_ITEM_TYPE_VOID:
1896 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1897 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1898 return rte_flow_error_set
1900 RTE_FLOW_ERROR_TYPE_ITEM, items,
1901 "inner tunnel port id"
1902 " item is not supported");
1903 mask.port_id = flow_tcf_item_mask
1904 (items, &rte_flow_item_port_id_mask,
1905 &flow_tcf_mask_supported.port_id,
1906 &flow_tcf_mask_empty.port_id,
1907 sizeof(flow_tcf_mask_supported.port_id),
1911 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1915 spec.port_id = items->spec;
1916 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1917 return rte_flow_error_set
1919 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1921 "no support for partial mask on"
1923 if (!mask.port_id->id)
1926 for (i = 0; ptoi[i].ifindex; ++i)
1927 if (ptoi[i].port_id == spec.port_id->id)
1929 if (!ptoi[i].ifindex)
1930 return rte_flow_error_set
1932 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1934 "missing data to convert port ID to"
1936 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1937 return rte_flow_error_set
1939 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1941 "cannot match traffic for"
1942 " several port IDs through"
1943 " a single flow rule");
1944 tcm_ifindex = ptoi[i].ifindex;
1947 case RTE_FLOW_ITEM_TYPE_ETH:
1948 ret = mlx5_flow_validate_item_eth(items, item_flags,
1952 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
1953 MLX5_FLOW_LAYER_INNER_L2 :
1954 MLX5_FLOW_LAYER_OUTER_L2;
1956 * Redundant check due to different supported mask.
1957 * Same for the rest of items.
1959 mask.eth = flow_tcf_item_mask
1960 (items, &rte_flow_item_eth_mask,
1961 &flow_tcf_mask_supported.eth,
1962 &flow_tcf_mask_empty.eth,
1963 sizeof(flow_tcf_mask_supported.eth),
1967 if (mask.eth->type && mask.eth->type !=
1969 return rte_flow_error_set
1971 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1973 "no support for partial mask on"
1975 assert(items->spec);
1976 spec.eth = items->spec;
1977 if (mask.eth->type &&
1978 (item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
1979 inner_etype != RTE_BE16(ETH_P_ALL) &&
1980 inner_etype != spec.eth->type)
1981 return rte_flow_error_set
1983 RTE_FLOW_ERROR_TYPE_ITEM,
1985 "inner eth_type conflict");
1986 if (mask.eth->type &&
1987 !(item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
1988 outer_etype != RTE_BE16(ETH_P_ALL) &&
1989 outer_etype != spec.eth->type)
1990 return rte_flow_error_set
1992 RTE_FLOW_ERROR_TYPE_ITEM,
1994 "outer eth_type conflict");
1995 if (mask.eth->type) {
1996 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1997 inner_etype = spec.eth->type;
1999 outer_etype = spec.eth->type;
2002 case RTE_FLOW_ITEM_TYPE_VLAN:
2003 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2004 return rte_flow_error_set
2006 RTE_FLOW_ERROR_TYPE_ITEM, items,
2008 " is not supported");
2009 ret = mlx5_flow_validate_item_vlan(items, item_flags,
2013 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2014 mask.vlan = flow_tcf_item_mask
2015 (items, &rte_flow_item_vlan_mask,
2016 &flow_tcf_mask_supported.vlan,
2017 &flow_tcf_mask_empty.vlan,
2018 sizeof(flow_tcf_mask_supported.vlan),
2022 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
2023 (mask.vlan->tci & RTE_BE16(0xe000)) !=
2024 RTE_BE16(0xe000)) ||
2025 (mask.vlan->tci & RTE_BE16(0x0fff) &&
2026 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
2027 RTE_BE16(0x0fff)) ||
2028 (mask.vlan->inner_type &&
2029 mask.vlan->inner_type != RTE_BE16(0xffff)))
2030 return rte_flow_error_set
2032 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2034 "no support for partial masks on"
2035 " \"tci\" (PCP and VID parts) and"
2036 " \"inner_type\" fields");
2037 if (outer_etype != RTE_BE16(ETH_P_ALL) &&
2038 outer_etype != RTE_BE16(ETH_P_8021Q))
2039 return rte_flow_error_set
2041 RTE_FLOW_ERROR_TYPE_ITEM,
2043 "outer eth_type conflict,"
2045 outer_etype = RTE_BE16(ETH_P_8021Q);
2046 assert(items->spec);
2047 spec.vlan = items->spec;
2048 if (mask.vlan->inner_type &&
2049 vlan_etype != RTE_BE16(ETH_P_ALL) &&
2050 vlan_etype != spec.vlan->inner_type)
2051 return rte_flow_error_set
2053 RTE_FLOW_ERROR_TYPE_ITEM,
2055 "vlan eth_type conflict");
2056 if (mask.vlan->inner_type)
2057 vlan_etype = spec.vlan->inner_type;
2059 case RTE_FLOW_ITEM_TYPE_IPV4:
2060 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
2064 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2065 MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2066 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2067 mask.ipv4 = flow_tcf_item_mask
2068 (items, &rte_flow_item_ipv4_mask,
2069 &flow_tcf_mask_supported.ipv4,
2070 &flow_tcf_mask_empty.ipv4,
2071 sizeof(flow_tcf_mask_supported.ipv4),
2075 if (mask.ipv4->hdr.next_proto_id &&
2076 mask.ipv4->hdr.next_proto_id != 0xff)
2077 return rte_flow_error_set
2079 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2081 "no support for partial mask on"
2082 " \"hdr.next_proto_id\" field");
2083 else if (mask.ipv4->hdr.next_proto_id)
2085 ((const struct rte_flow_item_ipv4 *)
2086 (items->spec))->hdr.next_proto_id;
2087 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2088 if (inner_etype != RTE_BE16(ETH_P_ALL) &&
2089 inner_etype != RTE_BE16(ETH_P_IP))
2090 return rte_flow_error_set
2092 RTE_FLOW_ERROR_TYPE_ITEM,
2094 "inner eth_type conflict,"
2095 " IPv4 is required");
2096 inner_etype = RTE_BE16(ETH_P_IP);
2097 } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) {
2098 if (vlan_etype != RTE_BE16(ETH_P_ALL) &&
2099 vlan_etype != RTE_BE16(ETH_P_IP))
2100 return rte_flow_error_set
2102 RTE_FLOW_ERROR_TYPE_ITEM,
2104 "vlan eth_type conflict,"
2105 " IPv4 is required");
2106 vlan_etype = RTE_BE16(ETH_P_IP);
2108 if (outer_etype != RTE_BE16(ETH_P_ALL) &&
2109 outer_etype != RTE_BE16(ETH_P_IP))
2110 return rte_flow_error_set
2112 RTE_FLOW_ERROR_TYPE_ITEM,
2114 "eth_type conflict,"
2115 " IPv4 is required");
2116 outer_etype = RTE_BE16(ETH_P_IP);
2119 case RTE_FLOW_ITEM_TYPE_IPV6:
2120 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
2124 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2125 MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2126 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2127 mask.ipv6 = flow_tcf_item_mask
2128 (items, &rte_flow_item_ipv6_mask,
2129 &flow_tcf_mask_supported.ipv6,
2130 &flow_tcf_mask_empty.ipv6,
2131 sizeof(flow_tcf_mask_supported.ipv6),
2135 if (mask.ipv6->hdr.proto &&
2136 mask.ipv6->hdr.proto != 0xff)
2137 return rte_flow_error_set
2139 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2141 "no support for partial mask on"
2142 " \"hdr.proto\" field");
2143 else if (mask.ipv6->hdr.proto)
2145 ((const struct rte_flow_item_ipv6 *)
2146 (items->spec))->hdr.proto;
2147 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2148 if (inner_etype != RTE_BE16(ETH_P_ALL) &&
2149 inner_etype != RTE_BE16(ETH_P_IPV6))
2150 return rte_flow_error_set
2152 RTE_FLOW_ERROR_TYPE_ITEM,
2154 "inner eth_type conflict,"
2155 " IPv6 is required");
2156 inner_etype = RTE_BE16(ETH_P_IPV6);
2157 } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) {
2158 if (vlan_etype != RTE_BE16(ETH_P_ALL) &&
2159 vlan_etype != RTE_BE16(ETH_P_IPV6))
2160 return rte_flow_error_set
2162 RTE_FLOW_ERROR_TYPE_ITEM,
2164 "vlan eth_type conflict,"
2165 " IPv6 is required");
2166 vlan_etype = RTE_BE16(ETH_P_IPV6);
2168 if (outer_etype != RTE_BE16(ETH_P_ALL) &&
2169 outer_etype != RTE_BE16(ETH_P_IPV6))
2170 return rte_flow_error_set
2172 RTE_FLOW_ERROR_TYPE_ITEM,
2174 "eth_type conflict,"
2175 " IPv6 is required");
2176 outer_etype = RTE_BE16(ETH_P_IPV6);
2179 case RTE_FLOW_ITEM_TYPE_UDP:
2180 ret = mlx5_flow_validate_item_udp(items, item_flags,
2181 next_protocol, error);
2184 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2185 MLX5_FLOW_LAYER_INNER_L4_UDP :
2186 MLX5_FLOW_LAYER_OUTER_L4_UDP;
2187 mask.udp = flow_tcf_item_mask
2188 (items, &rte_flow_item_udp_mask,
2189 &flow_tcf_mask_supported.udp,
2190 &flow_tcf_mask_empty.udp,
2191 sizeof(flow_tcf_mask_supported.udp),
2196 * Save the presumed outer UDP item for extra check
2197 * if the tunnel item will be found later in the list.
2199 if (!(item_flags & MLX5_FLOW_LAYER_TUNNEL))
2202 case RTE_FLOW_ITEM_TYPE_TCP:
2203 ret = mlx5_flow_validate_item_tcp
2206 &flow_tcf_mask_supported.tcp,
2210 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2211 MLX5_FLOW_LAYER_INNER_L4_TCP :
2212 MLX5_FLOW_LAYER_OUTER_L4_TCP;
2213 mask.tcp = flow_tcf_item_mask
2214 (items, &rte_flow_item_tcp_mask,
2215 &flow_tcf_mask_supported.tcp,
2216 &flow_tcf_mask_empty.tcp,
2217 sizeof(flow_tcf_mask_supported.tcp),
2222 case RTE_FLOW_ITEM_TYPE_VXLAN:
2223 if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)
2224 return rte_flow_error_set
2226 RTE_FLOW_ERROR_TYPE_ITEM, items,
2227 "vxlan tunnel over vlan"
2228 " is not supported");
2229 ret = mlx5_flow_validate_item_vxlan(items,
2233 item_flags |= MLX5_FLOW_LAYER_VXLAN;
2234 mask.vxlan = flow_tcf_item_mask
2235 (items, &rte_flow_item_vxlan_mask,
2236 &flow_tcf_mask_supported.vxlan,
2237 &flow_tcf_mask_empty.vxlan,
2238 sizeof(flow_tcf_mask_supported.vxlan), error);
2241 if (mask.vxlan->vni[0] != 0xff ||
2242 mask.vxlan->vni[1] != 0xff ||
2243 mask.vxlan->vni[2] != 0xff)
2244 return rte_flow_error_set
2246 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2248 "no support for partial or "
2249 "empty mask on \"vxlan.vni\" field");
2251 * The VNI item assumes the VXLAN tunnel, it requires
2252 * at least the outer destination UDP port must be
2253 * specified without wildcards to allow kernel select
2254 * the virtual VXLAN device by port. Also outer IPv4
2255 * or IPv6 item must be specified (wilcards or even
2256 * zero mask are allowed) to let driver know the tunnel
2257 * IP version and process UDP traffic correctly.
2260 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2261 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2262 return rte_flow_error_set
2264 RTE_FLOW_ERROR_TYPE_ACTION,
2266 "no outer IP pattern found"
2267 " for vxlan tunnel");
2268 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2269 return rte_flow_error_set
2271 RTE_FLOW_ERROR_TYPE_ACTION,
2273 "no outer UDP pattern found"
2274 " for vxlan tunnel");
2276 * All items preceding the tunnel item become outer
2277 * ones and we should do extra validation for them
2278 * due to tc limitations for tunnel outer parameters.
2279 * Currently only outer UDP item requres extra check,
2280 * use the saved pointer instead of item list rescan.
2283 ret = flow_tcf_validate_vxlan_decap_udp
2287 /* Reset L4 protocol for inner parameters. */
2288 next_protocol = 0xff;
2291 return rte_flow_error_set(error, ENOTSUP,
2292 RTE_FLOW_ERROR_TYPE_ITEM,
2293 items, "item not supported");
2296 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2297 (action_flags & MLX5_FLOW_ACTION_DROP))
2298 return rte_flow_error_set(error, ENOTSUP,
2299 RTE_FLOW_ERROR_TYPE_ACTION,
2301 "set action is not compatible with "
2303 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2304 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2305 return rte_flow_error_set(error, ENOTSUP,
2306 RTE_FLOW_ERROR_TYPE_ACTION,
2308 "set action must be followed by "
2311 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
2312 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
2313 return rte_flow_error_set(error, EINVAL,
2314 RTE_FLOW_ERROR_TYPE_ACTION,
2316 "no ipv4 item found in"
2320 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
2321 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
2322 return rte_flow_error_set(error, EINVAL,
2323 RTE_FLOW_ERROR_TYPE_ACTION,
2325 "no ipv6 item found in"
2329 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
2331 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
2332 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
2333 return rte_flow_error_set(error, EINVAL,
2334 RTE_FLOW_ERROR_TYPE_ACTION,
2336 "no TCP/UDP item found in"
2340 * FW syndrome (0xA9C090):
2341 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
2342 * forward to the uplink.
2344 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
2345 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
2346 ((struct priv *)port_id_dev->data->dev_private)->representor)
2347 return rte_flow_error_set(error, ENOTSUP,
2348 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2349 "vlan push can only be applied"
2350 " when forwarding to uplink port");
2352 * FW syndrome (0x294609):
2353 * set_flow_table_entry: modify/pop/push actions in fdb flow table
2354 * are supported only while forwarding to vport.
2356 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
2357 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2358 return rte_flow_error_set(error, ENOTSUP,
2359 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2360 "vlan actions are supported"
2361 " only with port_id action");
2362 if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
2363 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2364 return rte_flow_error_set(error, ENOTSUP,
2365 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2366 "vxlan actions are supported"
2367 " only with port_id action");
2368 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
2369 return rte_flow_error_set(error, EINVAL,
2370 RTE_FLOW_ERROR_TYPE_ACTION, actions,
2371 "no fate action is found");
2373 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
2375 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2376 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2377 return rte_flow_error_set(error, EINVAL,
2378 RTE_FLOW_ERROR_TYPE_ACTION,
2380 "no IP found in pattern");
2383 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
2384 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
2385 return rte_flow_error_set(error, ENOTSUP,
2386 RTE_FLOW_ERROR_TYPE_ACTION,
2388 "no ethernet found in"
2391 if ((action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) &&
2392 !(item_flags & MLX5_FLOW_LAYER_VXLAN))
2393 return rte_flow_error_set(error, EINVAL,
2394 RTE_FLOW_ERROR_TYPE_ACTION,
2396 "no VNI pattern found"
2397 " for vxlan decap action");
2398 if ((action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) &&
2399 (item_flags & MLX5_FLOW_LAYER_TUNNEL))
2400 return rte_flow_error_set(error, EINVAL,
2401 RTE_FLOW_ERROR_TYPE_ACTION,
2403 "vxlan encap not supported"
2404 " for tunneled traffic");
2409 * Calculate maximum size of memory for flow items of Linux TC flower.
2412 * Pointer to the flow attributes.
2414 * Pointer to the list of items.
2415 * @param[out] action_flags
2416 * Pointer to the detected actions.
2419 * Maximum size of memory for items.
2422 flow_tcf_get_items_size(const struct rte_flow_attr *attr,
2423 const struct rte_flow_item items[],
2424 uint64_t *action_flags)
2428 size += SZ_NLATTR_STRZ_OF("flower") +
2429 SZ_NLATTR_TYPE_OF(uint16_t) + /* Outer ether type. */
2430 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
2431 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
2432 if (attr->group > 0)
2433 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
2434 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2435 switch (items->type) {
2436 case RTE_FLOW_ITEM_TYPE_VOID:
2438 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2440 case RTE_FLOW_ITEM_TYPE_ETH:
2441 size += SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
2442 /* dst/src MAC addr and mask. */
2444 case RTE_FLOW_ITEM_TYPE_VLAN:
2445 size += SZ_NLATTR_TYPE_OF(uint16_t) +
2446 /* VLAN Ether type. */
2447 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
2448 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
2450 case RTE_FLOW_ITEM_TYPE_IPV4:
2451 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2452 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
2453 /* dst/src IP addr and mask. */
2455 case RTE_FLOW_ITEM_TYPE_IPV6:
2456 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2457 SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
2458 /* dst/src IP addr and mask. */
2460 case RTE_FLOW_ITEM_TYPE_UDP:
2461 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2462 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2463 /* dst/src port and mask. */
2465 case RTE_FLOW_ITEM_TYPE_TCP:
2466 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2467 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2468 /* dst/src port and mask. */
2470 case RTE_FLOW_ITEM_TYPE_VXLAN:
2471 size += SZ_NLATTR_TYPE_OF(uint32_t);
2473 * There might be no VXLAN decap action in the action
2474 * list, nonetheless the VXLAN tunnel flow requires
2475 * the decap structure to be correctly applied to
2476 * VXLAN device, set the flag to create the structure.
2477 * Translation routine will not put the decap action
2478 * in tne Netlink message if there is no actual action
2481 *action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
2485 "unsupported item %p type %d,"
2486 " items must be validated before flow creation",
2487 (const void *)items, items->type);
2495 * Calculate size of memory to store the VXLAN encapsultion
2496 * related items in the Netlink message buffer. Items list
2497 * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
2498 * The item list should be validated.
2501 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
2502 * List of pattern items to scan data from.
2505 * The size the part of Netlink message buffer to store the
2506 * VXLAN encapsulation item attributes.
2509 flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
2511 const struct rte_flow_item *items;
2514 assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
2515 assert(action->conf);
2517 items = ((const struct rte_flow_action_vxlan_encap *)
2518 action->conf)->definition;
2520 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2521 switch (items->type) {
2522 case RTE_FLOW_ITEM_TYPE_VOID:
2524 case RTE_FLOW_ITEM_TYPE_ETH:
2525 /* This item does not require message buffer. */
2527 case RTE_FLOW_ITEM_TYPE_IPV4:
2528 size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
2530 case RTE_FLOW_ITEM_TYPE_IPV6:
2531 size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
2533 case RTE_FLOW_ITEM_TYPE_UDP: {
2534 const struct rte_flow_item_udp *udp = items->mask;
2536 size += SZ_NLATTR_TYPE_OF(uint16_t);
2537 if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
2538 size += SZ_NLATTR_TYPE_OF(uint16_t);
2541 case RTE_FLOW_ITEM_TYPE_VXLAN:
2542 size += SZ_NLATTR_TYPE_OF(uint32_t);
2547 "unsupported item %p type %d,"
2548 " items must be validated"
2549 " before flow creation",
2550 (const void *)items, items->type);
2558 * Calculate maximum size of memory for flow actions of Linux TC flower and
2559 * extract specified actions.
2561 * @param[in] actions
2562 * Pointer to the list of actions.
2563 * @param[out] action_flags
2564 * Pointer to the detected actions.
2567 * Maximum size of memory for actions.
2570 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
2571 uint64_t *action_flags)
2576 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
2577 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2578 switch (actions->type) {
2579 case RTE_FLOW_ACTION_TYPE_VOID:
2581 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2582 size += SZ_NLATTR_NEST + /* na_act_index. */
2583 SZ_NLATTR_STRZ_OF("mirred") +
2584 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2585 SZ_NLATTR_TYPE_OF(struct tc_mirred);
2586 flags |= MLX5_FLOW_ACTION_PORT_ID;
2588 case RTE_FLOW_ACTION_TYPE_JUMP:
2589 size += SZ_NLATTR_NEST + /* na_act_index. */
2590 SZ_NLATTR_STRZ_OF("gact") +
2591 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2592 SZ_NLATTR_TYPE_OF(struct tc_gact);
2593 flags |= MLX5_FLOW_ACTION_JUMP;
2595 case RTE_FLOW_ACTION_TYPE_DROP:
2596 size += SZ_NLATTR_NEST + /* na_act_index. */
2597 SZ_NLATTR_STRZ_OF("gact") +
2598 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2599 SZ_NLATTR_TYPE_OF(struct tc_gact);
2600 flags |= MLX5_FLOW_ACTION_DROP;
2602 case RTE_FLOW_ACTION_TYPE_COUNT:
2604 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2605 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
2606 goto action_of_vlan;
2607 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2608 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
2609 goto action_of_vlan;
2610 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2611 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
2612 goto action_of_vlan;
2613 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2614 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
2615 goto action_of_vlan;
2617 size += SZ_NLATTR_NEST + /* na_act_index. */
2618 SZ_NLATTR_STRZ_OF("vlan") +
2619 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2620 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
2621 SZ_NLATTR_TYPE_OF(uint16_t) +
2622 /* VLAN protocol. */
2623 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
2624 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
2626 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2627 size += SZ_NLATTR_NEST + /* na_act_index. */
2628 SZ_NLATTR_STRZ_OF("tunnel_key") +
2629 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2630 SZ_NLATTR_TYPE_OF(uint8_t);
2631 size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2632 size += flow_tcf_vxlan_encap_size(actions) +
2633 RTE_ALIGN_CEIL /* preceding encap params. */
2634 (sizeof(struct flow_tcf_vxlan_encap),
2636 flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
2638 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
2639 size += SZ_NLATTR_NEST + /* na_act_index. */
2640 SZ_NLATTR_STRZ_OF("tunnel_key") +
2641 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2642 SZ_NLATTR_TYPE_OF(uint8_t);
2643 size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2644 size += RTE_ALIGN_CEIL /* preceding decap params. */
2645 (sizeof(struct flow_tcf_vxlan_decap),
2647 flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
2649 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2650 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2651 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2652 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2653 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2654 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2655 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2656 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2657 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2658 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2659 size += flow_tcf_get_pedit_actions_size(&actions,
2664 "unsupported action %p type %d,"
2665 " items must be validated before flow creation",
2666 (const void *)actions, actions->type);
2670 *action_flags = flags;
2675 * Brand rtnetlink buffer with unique handle.
2677 * This handle should be unique for a given network interface to avoid
2681 * Pointer to Netlink message.
2683 * Unique 32-bit handle to use.
2686 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
2688 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
2690 tcm->tcm_handle = handle;
2691 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
2692 (void *)nlh, handle);
2696 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
2697 * memory required, allocates the memory, initializes Netlink message headers
2698 * and set unique TC message handle.
2701 * Pointer to the flow attributes.
2703 * Pointer to the list of items.
2704 * @param[in] actions
2705 * Pointer to the list of actions.
2707 * Pointer to the error structure.
2710 * Pointer to mlx5_flow object on success,
2711 * otherwise NULL and rte_errno is set.
2713 static struct mlx5_flow *
2714 flow_tcf_prepare(const struct rte_flow_attr *attr,
2715 const struct rte_flow_item items[],
2716 const struct rte_flow_action actions[],
2717 struct rte_flow_error *error)
2719 size_t size = RTE_ALIGN_CEIL
2720 (sizeof(struct mlx5_flow),
2721 alignof(struct flow_tcf_tunnel_hdr)) +
2722 MNL_ALIGN(sizeof(struct nlmsghdr)) +
2723 MNL_ALIGN(sizeof(struct tcmsg));
2724 struct mlx5_flow *dev_flow;
2725 uint64_t action_flags = 0;
2726 struct nlmsghdr *nlh;
2728 uint8_t *sp, *tun = NULL;
2730 size += flow_tcf_get_items_size(attr, items, &action_flags);
2731 size += flow_tcf_get_actions_and_size(actions, &action_flags);
2732 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
2734 rte_flow_error_set(error, ENOMEM,
2735 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2736 "not enough memory to create E-Switch flow");
2739 sp = (uint8_t *)(dev_flow + 1);
2740 if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
2742 (sp, alignof(struct flow_tcf_tunnel_hdr));
2744 sp += RTE_ALIGN_CEIL
2745 (sizeof(struct flow_tcf_vxlan_encap),
2748 size -= RTE_ALIGN_CEIL
2749 (sizeof(struct flow_tcf_vxlan_encap),
2752 } else if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2754 (sp, alignof(struct flow_tcf_tunnel_hdr));
2756 sp += RTE_ALIGN_CEIL
2757 (sizeof(struct flow_tcf_vxlan_decap),
2760 size -= RTE_ALIGN_CEIL
2761 (sizeof(struct flow_tcf_vxlan_decap),
2765 sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
2767 nlh = mnl_nlmsg_put_header(sp);
2768 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2769 *dev_flow = (struct mlx5_flow){
2770 .tcf = (struct mlx5_flow_tcf){
2772 .nlsize = size - RTE_ALIGN_CEIL
2773 (sizeof(struct mlx5_flow),
2774 alignof(struct flow_tcf_tunnel_hdr)),
2776 .tunnel = (struct flow_tcf_tunnel_hdr *)tun,
2781 if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
2782 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
2783 else if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
2784 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
2786 * Generate a reasonably unique handle based on the address of the
2789 * This is straightforward on 32-bit systems where the flow pointer can
2790 * be used directly. Otherwise, its least significant part is taken
2791 * after shifting it by the previous power of two of the pointed buffer
2794 if (sizeof(dev_flow) <= 4)
2795 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
2797 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
2798 rte_log2_u32(rte_align32prevpow2(size)));
2803 * Make adjustments for supporting count actions.
2806 * Pointer to the Ethernet device structure.
2807 * @param[in] dev_flow
2808 * Pointer to mlx5_flow.
2810 * Pointer to error structure.
2813 * 0 On success else a negative errno value is returned and rte_errno is set.
2816 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
2817 struct mlx5_flow *dev_flow,
2818 struct rte_flow_error *error)
2820 struct rte_flow *flow = dev_flow->flow;
2822 if (!flow->counter) {
2823 flow->counter = flow_tcf_counter_new();
2825 return rte_flow_error_set(error, rte_errno,
2826 RTE_FLOW_ERROR_TYPE_ACTION,
2828 "cannot get counter"
2835 * Convert VXLAN VNI to 32-bit integer.
2838 * VXLAN VNI in 24-bit wire format.
2841 * VXLAN VNI as a 32-bit integer value in network endian.
2843 static inline rte_be32_t
2844 vxlan_vni_as_be32(const uint8_t vni[3])
2850 .vni = { 0, vni[0], vni[1], vni[2] },
2856 * Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration
2857 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields
2858 * in the encapsulation parameters structure. The item must be prevalidated,
2859 * no any validation checks performed by function.
2862 * RTE_FLOW_ITEM_TYPE_ETH entry specification.
2864 * RTE_FLOW_ITEM_TYPE_ETH entry mask.
2866 * Structure to fill the gathered MAC address data.
2869 flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec,
2870 const struct rte_flow_item_eth *mask,
2871 struct flow_tcf_vxlan_encap *encap)
2873 /* Item must be validated before. No redundant checks. */
2875 if (!mask || !memcmp(&mask->dst,
2876 &rte_flow_item_eth_mask.dst,
2877 sizeof(rte_flow_item_eth_mask.dst))) {
2879 * Ethernet addresses are not supported by
2880 * tc as tunnel_key parameters. Destination
2881 * address is needed to form encap packet
2882 * header and retrieved by kernel from
2883 * implicit sources (ARP table, etc),
2884 * address masks are not supported at all.
2886 encap->eth.dst = spec->dst;
2887 encap->mask |= FLOW_TCF_ENCAP_ETH_DST;
2889 if (!mask || !memcmp(&mask->src,
2890 &rte_flow_item_eth_mask.src,
2891 sizeof(rte_flow_item_eth_mask.src))) {
2893 * Ethernet addresses are not supported by
2894 * tc as tunnel_key parameters. Source ethernet
2895 * address is ignored anyway.
2897 encap->eth.src = spec->src;
2898 encap->mask |= FLOW_TCF_ENCAP_ETH_SRC;
2903 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration
2904 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields
2905 * in the encapsulation parameters structure. The item must be prevalidated,
2906 * no any validation checks performed by function.
2909 * RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
2911 * Structure to fill the gathered IPV4 address data.
2914 flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
2915 struct flow_tcf_vxlan_encap *encap)
2917 /* Item must be validated before. No redundant checks. */
2919 encap->ipv4.dst = spec->hdr.dst_addr;
2920 encap->ipv4.src = spec->hdr.src_addr;
2921 encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
2922 FLOW_TCF_ENCAP_IPV4_DST;
2926 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration
2927 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields
2928 * in the encapsulation parameters structure. The item must be prevalidated,
2929 * no any validation checks performed by function.
2932 * RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
2934 * Structure to fill the gathered IPV6 address data.
2937 flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
2938 struct flow_tcf_vxlan_encap *encap)
2940 /* Item must be validated before. No redundant checks. */
2942 memcpy(encap->ipv6.dst, spec->hdr.dst_addr, IPV6_ADDR_LEN);
2943 memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
2944 encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
2945 FLOW_TCF_ENCAP_IPV6_DST;
2949 * Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration
2950 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields
2951 * in the encapsulation parameters structure. The item must be prevalidated,
2952 * no any validation checks performed by function.
2955 * RTE_FLOW_ITEM_TYPE_UDP entry specification.
2957 * RTE_FLOW_ITEM_TYPE_UDP entry mask.
2959 * Structure to fill the gathered UDP port data.
2962 flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp *spec,
2963 const struct rte_flow_item_udp *mask,
2964 struct flow_tcf_vxlan_encap *encap)
2967 encap->udp.dst = spec->hdr.dst_port;
2968 encap->mask |= FLOW_TCF_ENCAP_UDP_DST;
2969 if (!mask || mask->hdr.src_port != RTE_BE16(0x0000)) {
2970 encap->udp.src = spec->hdr.src_port;
2971 encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC;
2976 * Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration
2977 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields
2978 * in the encapsulation parameters structure. The item must be prevalidated,
2979 * no any validation checks performed by function.
2982 * RTE_FLOW_ITEM_TYPE_VXLAN entry specification.
2984 * Structure to fill the gathered VNI address data.
2987 flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan *spec,
2988 struct flow_tcf_vxlan_encap *encap)
2990 /* Item must be validated before. Do not redundant checks. */
2992 memcpy(encap->vxlan.vni, spec->vni, sizeof(encap->vxlan.vni));
2993 encap->mask |= FLOW_TCF_ENCAP_VXLAN_VNI;
2997 * Populate consolidated encapsulation object from list of pattern items.
2999 * Helper function to process configuration of action such as
3000 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be
3001 * validated, there is no way to return an meaningful error.
3004 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
3005 * List of pattern items to gather data from.
3007 * Structure to fill gathered data.
3010 flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
3011 struct flow_tcf_vxlan_encap *encap)
3014 const struct rte_flow_item_eth *eth;
3015 const struct rte_flow_item_ipv4 *ipv4;
3016 const struct rte_flow_item_ipv6 *ipv6;
3017 const struct rte_flow_item_udp *udp;
3018 const struct rte_flow_item_vxlan *vxlan;
3020 const struct rte_flow_item *items;
3022 assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
3023 assert(action->conf);
3025 items = ((const struct rte_flow_action_vxlan_encap *)
3026 action->conf)->definition;
3028 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3029 switch (items->type) {
3030 case RTE_FLOW_ITEM_TYPE_VOID:
3032 case RTE_FLOW_ITEM_TYPE_ETH:
3033 mask.eth = items->mask;
3034 spec.eth = items->spec;
3035 flow_tcf_parse_vxlan_encap_eth(spec.eth, mask.eth,
3038 case RTE_FLOW_ITEM_TYPE_IPV4:
3039 spec.ipv4 = items->spec;
3040 flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
3042 case RTE_FLOW_ITEM_TYPE_IPV6:
3043 spec.ipv6 = items->spec;
3044 flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
3046 case RTE_FLOW_ITEM_TYPE_UDP:
3047 mask.udp = items->mask;
3048 spec.udp = items->spec;
3049 flow_tcf_parse_vxlan_encap_udp(spec.udp, mask.udp,
3052 case RTE_FLOW_ITEM_TYPE_VXLAN:
3053 spec.vxlan = items->spec;
3054 flow_tcf_parse_vxlan_encap_vni(spec.vxlan, encap);
3059 "unsupported item %p type %d,"
3060 " items must be validated"
3061 " before flow creation",
3062 (const void *)items, items->type);
3070 * Translate flow for Linux TC flower and construct Netlink message.
3073 * Pointer to the priv structure.
3074 * @param[in, out] flow
3075 * Pointer to the sub flow.
3077 * Pointer to the flow attributes.
3079 * Pointer to the list of items.
3080 * @param[in] actions
3081 * Pointer to the list of actions.
3083 * Pointer to the error structure.
3086 * 0 on success, a negative errno value otherwise and rte_errno is set.
3089 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3090 const struct rte_flow_attr *attr,
3091 const struct rte_flow_item items[],
3092 const struct rte_flow_action actions[],
3093 struct rte_flow_error *error)
3096 const struct rte_flow_item_port_id *port_id;
3097 const struct rte_flow_item_eth *eth;
3098 const struct rte_flow_item_vlan *vlan;
3099 const struct rte_flow_item_ipv4 *ipv4;
3100 const struct rte_flow_item_ipv6 *ipv6;
3101 const struct rte_flow_item_tcp *tcp;
3102 const struct rte_flow_item_udp *udp;
3103 const struct rte_flow_item_vxlan *vxlan;
3106 const struct rte_flow_action_port_id *port_id;
3107 const struct rte_flow_action_jump *jump;
3108 const struct rte_flow_action_of_push_vlan *of_push_vlan;
3109 const struct rte_flow_action_of_set_vlan_vid *
3111 const struct rte_flow_action_of_set_vlan_pcp *
3115 struct flow_tcf_tunnel_hdr *hdr;
3116 struct flow_tcf_vxlan_decap *vxlan;
3121 struct flow_tcf_tunnel_hdr *hdr;
3122 struct flow_tcf_vxlan_encap *vxlan;
3126 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
3127 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
3128 struct tcmsg *tcm = dev_flow->tcf.tcm;
3129 uint32_t na_act_index_cur;
3130 rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL);
3131 rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL);
3132 rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL);
3133 bool ip_proto_set = 0;
3134 bool tunnel_outer = 0;
3135 struct nlattr *na_flower;
3136 struct nlattr *na_flower_act;
3137 struct nlattr *na_vlan_id = NULL;
3138 struct nlattr *na_vlan_priority = NULL;
3139 uint64_t item_flags = 0;
3142 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
3143 PTOI_TABLE_SZ_MAX(dev)));
3144 if (dev_flow->tcf.tunnel) {
3145 switch (dev_flow->tcf.tunnel->type) {
3146 case FLOW_TCF_TUNACT_VXLAN_DECAP:
3147 decap.vxlan = dev_flow->tcf.vxlan_decap;
3150 case FLOW_TCF_TUNACT_VXLAN_ENCAP:
3151 encap.vxlan = dev_flow->tcf.vxlan_encap;
3153 /* New tunnel actions can be added here. */
3159 nlh = dev_flow->tcf.nlh;
3160 tcm = dev_flow->tcf.tcm;
3161 /* Prepare API must have been called beforehand. */
3162 assert(nlh != NULL && tcm != NULL);
3163 tcm->tcm_family = AF_UNSPEC;
3164 tcm->tcm_ifindex = ptoi[0].ifindex;
3165 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
3167 * Priority cannot be zero to prevent the kernel from picking one
3170 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16, outer_etype);
3171 if (attr->group > 0)
3172 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
3173 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
3174 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
3175 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3178 switch (items->type) {
3179 case RTE_FLOW_ITEM_TYPE_VOID:
3181 case RTE_FLOW_ITEM_TYPE_PORT_ID:
3182 mask.port_id = flow_tcf_item_mask
3183 (items, &rte_flow_item_port_id_mask,
3184 &flow_tcf_mask_supported.port_id,
3185 &flow_tcf_mask_empty.port_id,
3186 sizeof(flow_tcf_mask_supported.port_id),
3188 assert(mask.port_id);
3189 if (mask.port_id == &flow_tcf_mask_empty.port_id)
3191 spec.port_id = items->spec;
3192 if (!mask.port_id->id)
3195 for (i = 0; ptoi[i].ifindex; ++i)
3196 if (ptoi[i].port_id == spec.port_id->id)
3198 assert(ptoi[i].ifindex);
3199 tcm->tcm_ifindex = ptoi[i].ifindex;
3201 case RTE_FLOW_ITEM_TYPE_ETH:
3202 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
3203 MLX5_FLOW_LAYER_INNER_L2 :
3204 MLX5_FLOW_LAYER_OUTER_L2;
3205 mask.eth = flow_tcf_item_mask
3206 (items, &rte_flow_item_eth_mask,
3207 &flow_tcf_mask_supported.eth,
3208 &flow_tcf_mask_empty.eth,
3209 sizeof(flow_tcf_mask_supported.eth),
3212 if (mask.eth == &flow_tcf_mask_empty.eth)
3214 spec.eth = items->spec;
3215 if (mask.eth->type) {
3216 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3217 inner_etype = spec.eth->type;
3219 outer_etype = spec.eth->type;
3223 "outer L2 addresses cannot be"
3224 " forced is outer ones for tunnel,"
3225 " parameter is ignored");
3228 if (!is_zero_ether_addr(&mask.eth->dst)) {
3229 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
3231 spec.eth->dst.addr_bytes);
3232 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
3234 mask.eth->dst.addr_bytes);
3236 if (!is_zero_ether_addr(&mask.eth->src)) {
3237 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
3239 spec.eth->src.addr_bytes);
3240 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
3242 mask.eth->src.addr_bytes);
3244 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3246 case RTE_FLOW_ITEM_TYPE_VLAN:
3249 assert(!tunnel_outer);
3250 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
3251 mask.vlan = flow_tcf_item_mask
3252 (items, &rte_flow_item_vlan_mask,
3253 &flow_tcf_mask_supported.vlan,
3254 &flow_tcf_mask_empty.vlan,
3255 sizeof(flow_tcf_mask_supported.vlan),
3258 if (mask.vlan == &flow_tcf_mask_empty.vlan)
3260 spec.vlan = items->spec;
3261 assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
3262 outer_etype == RTE_BE16(ETH_P_8021Q));
3263 outer_etype = RTE_BE16(ETH_P_8021Q);
3264 if (mask.vlan->inner_type)
3265 vlan_etype = spec.vlan->inner_type;
3266 if (mask.vlan->tci & RTE_BE16(0xe000))
3267 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
3269 (spec.vlan->tci) >> 13) & 0x7);
3270 if (mask.vlan->tci & RTE_BE16(0x0fff))
3271 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
3275 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3277 case RTE_FLOW_ITEM_TYPE_IPV4:
3278 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
3279 MLX5_FLOW_LAYER_INNER_L3_IPV4 :
3280 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3281 mask.ipv4 = flow_tcf_item_mask
3282 (items, &rte_flow_item_ipv4_mask,
3283 &flow_tcf_mask_supported.ipv4,
3284 &flow_tcf_mask_empty.ipv4,
3285 sizeof(flow_tcf_mask_supported.ipv4),
3288 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
3289 assert(inner_etype == RTE_BE16(ETH_P_ALL) ||
3290 inner_etype == RTE_BE16(ETH_P_IP));
3291 inner_etype = RTE_BE16(ETH_P_IP);
3292 } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) {
3293 assert(vlan_etype == RTE_BE16(ETH_P_ALL) ||
3294 vlan_etype == RTE_BE16(ETH_P_IP));
3295 vlan_etype = RTE_BE16(ETH_P_IP);
3297 assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
3298 outer_etype == RTE_BE16(ETH_P_IP));
3299 outer_etype = RTE_BE16(ETH_P_IP);
3301 spec.ipv4 = items->spec;
3302 if (!tunnel_outer && mask.ipv4->hdr.next_proto_id) {
3304 * No way to set IP protocol for outer tunnel
3305 * layers. Usually it is fixed, for example,
3306 * to UDP for VXLAN/GPE.
3308 assert(spec.ipv4); /* Mask is not empty. */
3309 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3310 spec.ipv4->hdr.next_proto_id);
3313 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4 ||
3314 (!mask.ipv4->hdr.src_addr &&
3315 !mask.ipv4->hdr.dst_addr)) {
3319 * For tunnel outer we must set outer IP key
3320 * anyway, even if the specification/mask is
3321 * empty. There is no another way to tell
3322 * kernel about he outer layer protocol.
3325 (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC,
3326 mask.ipv4->hdr.src_addr);
3328 (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
3329 mask.ipv4->hdr.src_addr);
3330 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3333 if (mask.ipv4->hdr.src_addr) {
3335 (nlh, tunnel_outer ?
3336 TCA_FLOWER_KEY_ENC_IPV4_SRC :
3337 TCA_FLOWER_KEY_IPV4_SRC,
3338 spec.ipv4->hdr.src_addr);
3340 (nlh, tunnel_outer ?
3341 TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
3342 TCA_FLOWER_KEY_IPV4_SRC_MASK,
3343 mask.ipv4->hdr.src_addr);
3345 if (mask.ipv4->hdr.dst_addr) {
3347 (nlh, tunnel_outer ?
3348 TCA_FLOWER_KEY_ENC_IPV4_DST :
3349 TCA_FLOWER_KEY_IPV4_DST,
3350 spec.ipv4->hdr.dst_addr);
3352 (nlh, tunnel_outer ?
3353 TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
3354 TCA_FLOWER_KEY_IPV4_DST_MASK,
3355 mask.ipv4->hdr.dst_addr);
3357 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3359 case RTE_FLOW_ITEM_TYPE_IPV6: {
3360 bool ipv6_src, ipv6_dst;
3362 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
3363 MLX5_FLOW_LAYER_INNER_L3_IPV6 :
3364 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3365 mask.ipv6 = flow_tcf_item_mask
3366 (items, &rte_flow_item_ipv6_mask,
3367 &flow_tcf_mask_supported.ipv6,
3368 &flow_tcf_mask_empty.ipv6,
3369 sizeof(flow_tcf_mask_supported.ipv6),
3372 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
3373 assert(inner_etype == RTE_BE16(ETH_P_ALL) ||
3374 inner_etype == RTE_BE16(ETH_P_IPV6));
3375 inner_etype = RTE_BE16(ETH_P_IPV6);
3376 } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) {
3377 assert(vlan_etype == RTE_BE16(ETH_P_ALL) ||
3378 vlan_etype == RTE_BE16(ETH_P_IPV6));
3379 vlan_etype = RTE_BE16(ETH_P_IPV6);
3381 assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
3382 outer_etype == RTE_BE16(ETH_P_IPV6));
3383 outer_etype = RTE_BE16(ETH_P_IPV6);
3385 spec.ipv6 = items->spec;
3386 if (!tunnel_outer && mask.ipv6->hdr.proto) {
3388 * No way to set IP protocol for outer tunnel
3389 * layers. Usually it is fixed, for example,
3390 * to UDP for VXLAN/GPE.
3392 assert(spec.ipv6); /* Mask is not empty. */
3393 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3394 spec.ipv6->hdr.proto);
3397 ipv6_dst = !IN6_IS_ADDR_UNSPECIFIED
3398 (mask.ipv6->hdr.dst_addr);
3399 ipv6_src = !IN6_IS_ADDR_UNSPECIFIED
3400 (mask.ipv6->hdr.src_addr);
3401 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6 ||
3402 (!ipv6_dst && !ipv6_src)) {
3406 * For tunnel outer we must set outer IP key
3407 * anyway, even if the specification/mask is
3408 * empty. There is no another way to tell
3409 * kernel about he outer layer protocol.
3412 TCA_FLOWER_KEY_ENC_IPV6_SRC,
3414 mask.ipv6->hdr.src_addr);
3416 TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
3418 mask.ipv6->hdr.src_addr);
3419 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3423 mnl_attr_put(nlh, tunnel_outer ?
3424 TCA_FLOWER_KEY_ENC_IPV6_SRC :
3425 TCA_FLOWER_KEY_IPV6_SRC,
3427 spec.ipv6->hdr.src_addr);
3428 mnl_attr_put(nlh, tunnel_outer ?
3429 TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
3430 TCA_FLOWER_KEY_IPV6_SRC_MASK,
3432 mask.ipv6->hdr.src_addr);
3435 mnl_attr_put(nlh, tunnel_outer ?
3436 TCA_FLOWER_KEY_ENC_IPV6_DST :
3437 TCA_FLOWER_KEY_IPV6_DST,
3439 spec.ipv6->hdr.dst_addr);
3440 mnl_attr_put(nlh, tunnel_outer ?
3441 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
3442 TCA_FLOWER_KEY_IPV6_DST_MASK,
3444 mask.ipv6->hdr.dst_addr);
3446 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3449 case RTE_FLOW_ITEM_TYPE_UDP:
3450 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
3451 MLX5_FLOW_LAYER_INNER_L4_UDP :
3452 MLX5_FLOW_LAYER_OUTER_L4_UDP;
3453 mask.udp = flow_tcf_item_mask
3454 (items, &rte_flow_item_udp_mask,
3455 &flow_tcf_mask_supported.udp,
3456 &flow_tcf_mask_empty.udp,
3457 sizeof(flow_tcf_mask_supported.udp),
3460 spec.udp = items->spec;
3461 if (!tunnel_outer) {
3464 (nlh, TCA_FLOWER_KEY_IP_PROTO,
3466 if (mask.udp == &flow_tcf_mask_empty.udp)
3469 assert(mask.udp != &flow_tcf_mask_empty.udp);
3470 decap.vxlan->udp_port =
3472 (spec.udp->hdr.dst_port);
3474 if (mask.udp->hdr.src_port) {
3476 (nlh, tunnel_outer ?
3477 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
3478 TCA_FLOWER_KEY_UDP_SRC,
3479 spec.udp->hdr.src_port);
3481 (nlh, tunnel_outer ?
3482 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
3483 TCA_FLOWER_KEY_UDP_SRC_MASK,
3484 mask.udp->hdr.src_port);
3486 if (mask.udp->hdr.dst_port) {
3488 (nlh, tunnel_outer ?
3489 TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
3490 TCA_FLOWER_KEY_UDP_DST,
3491 spec.udp->hdr.dst_port);
3493 (nlh, tunnel_outer ?
3494 TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
3495 TCA_FLOWER_KEY_UDP_DST_MASK,
3496 mask.udp->hdr.dst_port);
3498 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3500 case RTE_FLOW_ITEM_TYPE_TCP:
3501 item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
3502 MLX5_FLOW_LAYER_INNER_L4_TCP :
3503 MLX5_FLOW_LAYER_OUTER_L4_TCP;
3504 mask.tcp = flow_tcf_item_mask
3505 (items, &rte_flow_item_tcp_mask,
3506 &flow_tcf_mask_supported.tcp,
3507 &flow_tcf_mask_empty.tcp,
3508 sizeof(flow_tcf_mask_supported.tcp),
3512 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3514 if (mask.tcp == &flow_tcf_mask_empty.tcp)
3516 spec.tcp = items->spec;
3517 if (mask.tcp->hdr.src_port) {
3518 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
3519 spec.tcp->hdr.src_port);
3520 mnl_attr_put_u16(nlh,
3521 TCA_FLOWER_KEY_TCP_SRC_MASK,
3522 mask.tcp->hdr.src_port);
3524 if (mask.tcp->hdr.dst_port) {
3525 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
3526 spec.tcp->hdr.dst_port);
3527 mnl_attr_put_u16(nlh,
3528 TCA_FLOWER_KEY_TCP_DST_MASK,
3529 mask.tcp->hdr.dst_port);
3531 if (mask.tcp->hdr.tcp_flags) {
3534 TCA_FLOWER_KEY_TCP_FLAGS,
3536 (spec.tcp->hdr.tcp_flags));
3539 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
3541 (mask.tcp->hdr.tcp_flags));
3543 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3545 case RTE_FLOW_ITEM_TYPE_VXLAN:
3546 assert(decap.vxlan);
3548 item_flags |= MLX5_FLOW_LAYER_VXLAN;
3549 spec.vxlan = items->spec;
3550 mnl_attr_put_u32(nlh,
3551 TCA_FLOWER_KEY_ENC_KEY_ID,
3552 vxlan_vni_as_be32(spec.vxlan->vni));
3553 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3556 return rte_flow_error_set(error, ENOTSUP,
3557 RTE_FLOW_ERROR_TYPE_ITEM,
3558 NULL, "item not supported");
3562 * Set the ether_type flower key and tc rule protocol:
3563 * - if there is nor VLAN neither VXLAN the key is taken from
3564 * eth item directly or deduced from L3 items.
3565 * - if there is vlan item then key is fixed to 802.1q.
3566 * - if there is vxlan item then key is set to inner tunnel type.
3567 * - simultaneous vlan and vxlan items are prohibited.
3569 if (outer_etype != RTE_BE16(ETH_P_ALL)) {
3570 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
3572 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
3573 if (inner_etype != RTE_BE16(ETH_P_ALL))
3574 mnl_attr_put_u16(nlh,
3575 TCA_FLOWER_KEY_ETH_TYPE,
3578 mnl_attr_put_u16(nlh,
3579 TCA_FLOWER_KEY_ETH_TYPE,
3581 if (outer_etype == RTE_BE16(ETH_P_8021Q) &&
3582 vlan_etype != RTE_BE16(ETH_P_ALL))
3583 mnl_attr_put_u16(nlh,
3584 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
3587 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3589 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
3590 na_act_index_cur = 1;
3591 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3592 struct nlattr *na_act_index;
3593 struct nlattr *na_act;
3594 unsigned int vlan_act;
3597 switch (actions->type) {
3598 case RTE_FLOW_ACTION_TYPE_VOID:
3600 case RTE_FLOW_ACTION_TYPE_PORT_ID:
3601 conf.port_id = actions->conf;
3602 if (conf.port_id->original)
3605 for (i = 0; ptoi[i].ifindex; ++i)
3606 if (ptoi[i].port_id == conf.port_id->id)
3608 assert(ptoi[i].ifindex);
3610 mnl_attr_nest_start(nlh, na_act_index_cur++);
3611 assert(na_act_index);
3612 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
3613 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3616 assert(dev_flow->tcf.tunnel);
3617 dev_flow->tcf.tunnel->ifindex_ptr =
3618 &((struct tc_mirred *)
3619 mnl_attr_get_payload
3620 (mnl_nlmsg_get_payload_tail
3623 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
3624 sizeof(struct tc_mirred),
3625 &(struct tc_mirred){
3626 .action = TC_ACT_STOLEN,
3627 .eaction = TCA_EGRESS_REDIR,
3628 .ifindex = ptoi[i].ifindex,
3630 mnl_attr_nest_end(nlh, na_act);
3631 mnl_attr_nest_end(nlh, na_act_index);
3633 case RTE_FLOW_ACTION_TYPE_JUMP:
3634 conf.jump = actions->conf;
3636 mnl_attr_nest_start(nlh, na_act_index_cur++);
3637 assert(na_act_index);
3638 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3639 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3641 mnl_attr_put(nlh, TCA_GACT_PARMS,
3642 sizeof(struct tc_gact),
3644 .action = TC_ACT_GOTO_CHAIN |
3647 mnl_attr_nest_end(nlh, na_act);
3648 mnl_attr_nest_end(nlh, na_act_index);
3650 case RTE_FLOW_ACTION_TYPE_DROP:
3652 mnl_attr_nest_start(nlh, na_act_index_cur++);
3653 assert(na_act_index);
3654 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3655 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3657 mnl_attr_put(nlh, TCA_GACT_PARMS,
3658 sizeof(struct tc_gact),
3660 .action = TC_ACT_SHOT,
3662 mnl_attr_nest_end(nlh, na_act);
3663 mnl_attr_nest_end(nlh, na_act_index);
3665 case RTE_FLOW_ACTION_TYPE_COUNT:
3667 * Driver adds the count action implicitly for
3668 * each rule it creates.
3670 ret = flow_tcf_translate_action_count(dev,
3675 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3676 conf.of_push_vlan = NULL;
3677 vlan_act = TCA_VLAN_ACT_POP;
3678 goto action_of_vlan;
3679 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3680 conf.of_push_vlan = actions->conf;
3681 vlan_act = TCA_VLAN_ACT_PUSH;
3682 goto action_of_vlan;
3683 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3684 conf.of_set_vlan_vid = actions->conf;
3686 goto override_na_vlan_id;
3687 vlan_act = TCA_VLAN_ACT_MODIFY;
3688 goto action_of_vlan;
3689 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3690 conf.of_set_vlan_pcp = actions->conf;
3691 if (na_vlan_priority)
3692 goto override_na_vlan_priority;
3693 vlan_act = TCA_VLAN_ACT_MODIFY;
3694 goto action_of_vlan;
3697 mnl_attr_nest_start(nlh, na_act_index_cur++);
3698 assert(na_act_index);
3699 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
3700 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3702 mnl_attr_put(nlh, TCA_VLAN_PARMS,
3703 sizeof(struct tc_vlan),
3705 .action = TC_ACT_PIPE,
3706 .v_action = vlan_act,
3708 if (vlan_act == TCA_VLAN_ACT_POP) {
3709 mnl_attr_nest_end(nlh, na_act);
3710 mnl_attr_nest_end(nlh, na_act_index);
3713 if (vlan_act == TCA_VLAN_ACT_PUSH)
3714 mnl_attr_put_u16(nlh,
3715 TCA_VLAN_PUSH_VLAN_PROTOCOL,
3716 conf.of_push_vlan->ethertype);
3717 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
3718 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
3719 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
3720 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
3721 mnl_attr_nest_end(nlh, na_act);
3722 mnl_attr_nest_end(nlh, na_act_index);
3723 if (actions->type ==
3724 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
3725 override_na_vlan_id:
3726 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
3727 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
3729 (conf.of_set_vlan_vid->vlan_vid);
3730 } else if (actions->type ==
3731 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
3732 override_na_vlan_priority:
3733 na_vlan_priority->nla_type =
3734 TCA_VLAN_PUSH_VLAN_PRIORITY;
3735 *(uint8_t *)mnl_attr_get_payload
3736 (na_vlan_priority) =
3737 conf.of_set_vlan_pcp->vlan_pcp;
3740 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3741 assert(decap.vxlan);
3742 assert(dev_flow->tcf.tunnel);
3743 dev_flow->tcf.tunnel->ifindex_ptr =
3744 (unsigned int *)&tcm->tcm_ifindex;
3746 mnl_attr_nest_start(nlh, na_act_index_cur++);
3747 assert(na_act_index);
3748 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
3749 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3751 mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
3752 sizeof(struct tc_tunnel_key),
3753 &(struct tc_tunnel_key){
3754 .action = TC_ACT_PIPE,
3755 .t_action = TCA_TUNNEL_KEY_ACT_RELEASE,
3757 mnl_attr_nest_end(nlh, na_act);
3758 mnl_attr_nest_end(nlh, na_act_index);
3759 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3761 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3762 assert(encap.vxlan);
3763 flow_tcf_vxlan_encap_parse(actions, encap.vxlan);
3765 mnl_attr_nest_start(nlh, na_act_index_cur++);
3766 assert(na_act_index);
3767 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
3768 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3770 mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
3771 sizeof(struct tc_tunnel_key),
3772 &(struct tc_tunnel_key){
3773 .action = TC_ACT_PIPE,
3774 .t_action = TCA_TUNNEL_KEY_ACT_SET,
3776 if (encap.vxlan->mask & FLOW_TCF_ENCAP_UDP_DST)
3777 mnl_attr_put_u16(nlh,
3778 TCA_TUNNEL_KEY_ENC_DST_PORT,
3779 encap.vxlan->udp.dst);
3780 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_SRC)
3781 mnl_attr_put_u32(nlh,
3782 TCA_TUNNEL_KEY_ENC_IPV4_SRC,
3783 encap.vxlan->ipv4.src);
3784 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_DST)
3785 mnl_attr_put_u32(nlh,
3786 TCA_TUNNEL_KEY_ENC_IPV4_DST,
3787 encap.vxlan->ipv4.dst);
3788 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_SRC)
3790 TCA_TUNNEL_KEY_ENC_IPV6_SRC,
3791 sizeof(encap.vxlan->ipv6.src),
3792 &encap.vxlan->ipv6.src);
3793 if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_DST)
3795 TCA_TUNNEL_KEY_ENC_IPV6_DST,
3796 sizeof(encap.vxlan->ipv6.dst),
3797 &encap.vxlan->ipv6.dst);
3798 if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
3799 mnl_attr_put_u32(nlh,
3800 TCA_TUNNEL_KEY_ENC_KEY_ID,
3802 (encap.vxlan->vxlan.vni));
3803 mnl_attr_put_u8(nlh, TCA_TUNNEL_KEY_NO_CSUM, 0);
3804 mnl_attr_nest_end(nlh, na_act);
3805 mnl_attr_nest_end(nlh, na_act_index);
3806 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3808 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3809 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3810 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3811 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3812 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3813 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3814 case RTE_FLOW_ACTION_TYPE_SET_TTL:
3815 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3816 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3817 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3819 mnl_attr_nest_start(nlh, na_act_index_cur++);
3820 flow_tcf_create_pedit_mnl_msg(nlh,
3821 &actions, item_flags);
3822 mnl_attr_nest_end(nlh, na_act_index);
3825 return rte_flow_error_set(error, ENOTSUP,
3826 RTE_FLOW_ERROR_TYPE_ACTION,
3828 "action not supported");
3832 assert(na_flower_act);
3833 mnl_attr_nest_end(nlh, na_flower_act);
3834 dev_flow->tcf.ptc_flags = mnl_attr_get_payload
3835 (mnl_nlmsg_get_payload_tail(nlh));
3836 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ?
3837 0 : TCA_CLS_FLAGS_SKIP_SW);
3838 mnl_attr_nest_end(nlh, na_flower);
3839 if (dev_flow->tcf.tunnel && dev_flow->tcf.tunnel->ifindex_ptr)
3840 dev_flow->tcf.tunnel->ifindex_org =
3841 *dev_flow->tcf.tunnel->ifindex_ptr;
3842 assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
3847 * Send Netlink message with acknowledgment.
3850 * Flow context to use.
3852 * Message to send. This function always raises the NLM_F_ACK flag before
3855 * Callback handler for received message.
3857 * Context pointer for callback handler.
3860 * 0 on success, a negative errno value otherwise and rte_errno is set.
3863 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf,
3864 struct nlmsghdr *nlh,
3865 mnl_cb_t cb, void *arg)
3867 unsigned int portid = mnl_socket_get_portid(tcf->nl);
3868 uint32_t seq = tcf->seq++;
3874 /* seq 0 is reserved for kernel event-driven notifications. */
3877 nlh->nlmsg_seq = seq;
3878 nlh->nlmsg_flags |= NLM_F_ACK;
3879 ret = mnl_socket_sendto(tcf->nl, nlh, nlh->nlmsg_len);
3881 /* Message send error occurres. */
3885 nlh = (struct nlmsghdr *)(tcf->buf);
3887 * The following loop postpones non-fatal errors until multipart
3888 * messages are complete.
3891 ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, tcf->buf_size);
3895 * In case of overflow Will receive till
3896 * end of multipart message. We may lost part
3897 * of reply messages but mark and return an error.
3899 if (err != ENOSPC ||
3900 !(nlh->nlmsg_flags & NLM_F_MULTI) ||
3901 nlh->nlmsg_type == NLMSG_DONE)
3904 ret = mnl_cb_run(nlh, ret, seq, portid, cb, arg);
3907 * libmnl returns 0 if DONE or
3908 * success ACK message found.
3914 * ACK message with error found
3915 * or some error occurred.
3920 /* We should continue receiving. */
3929 #define MNL_BUF_EXTRA_SPACE 16
3930 #define MNL_REQUEST_SIZE_MIN 256
3931 #define MNL_REQUEST_SIZE_MAX 2048
3932 #define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
3933 MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
3935 /* Data structures used by flow_tcf_xxx_cb() routines. */
3936 struct tcf_nlcb_buf {
3937 LIST_ENTRY(tcf_nlcb_buf) next;
3939 alignas(struct nlmsghdr)
3940 uint8_t msg[]; /**< Netlink message data. */
3943 struct tcf_nlcb_context {
3944 unsigned int ifindex; /**< Base interface index. */
3946 LIST_HEAD(, tcf_nlcb_buf) nlbuf;
3950 * Allocate space for netlink command in buffer list
3952 * @param[in, out] ctx
3953 * Pointer to callback context with command buffers list.
3955 * Required size of data buffer to be allocated.
3958 * Pointer to allocated memory, aligned as message header.
3959 * NULL if some error occurred.
3961 static struct nlmsghdr *
3962 flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size)
3964 struct tcf_nlcb_buf *buf;
3965 struct nlmsghdr *nlh;
3967 size = NLMSG_ALIGN(size);
3968 buf = LIST_FIRST(&ctx->nlbuf);
3969 if (buf && (buf->size + size) <= ctx->bufsize) {
3970 nlh = (struct nlmsghdr *)&buf->msg[buf->size];
3974 if (size > ctx->bufsize) {
3975 DRV_LOG(WARNING, "netlink: too long command buffer requested");
3978 buf = rte_malloc(__func__,
3979 ctx->bufsize + sizeof(struct tcf_nlcb_buf),
3980 alignof(struct tcf_nlcb_buf));
3982 DRV_LOG(WARNING, "netlink: no memory for command buffer");
3985 LIST_INSERT_HEAD(&ctx->nlbuf, buf, next);
3987 nlh = (struct nlmsghdr *)&buf->msg[0];
3992 * Send the buffers with prepared netlink commands. Scans the list and
3993 * sends all found buffers. Buffers are sent and freed anyway in order
3994 * to prevent memory leakage if some every message in received packet.
3997 * Context object initialized by mlx5_flow_tcf_context_create().
3998 * @param[in, out] ctx
3999 * Pointer to callback context with command buffers list.
4002 * Zero value on success, negative errno value otherwise
4003 * and rte_errno is set.
4006 flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
4007 struct tcf_nlcb_context *ctx)
4009 struct tcf_nlcb_buf *bc = LIST_FIRST(&ctx->nlbuf);
4013 struct tcf_nlcb_buf *bn = LIST_NEXT(bc, next);
4014 struct nlmsghdr *nlh;
4018 while (msg < bc->size) {
4020 * Send Netlink commands from buffer in one by one
4021 * fashion. If we send multiple rule deletion commands
4022 * in one Netlink message and some error occurs it may
4023 * cause multiple ACK error messages and break sequence
4024 * numbers of Netlink communication, because we expect
4025 * the only one ACK reply.
4027 assert((bc->size - msg) >= sizeof(struct nlmsghdr));
4028 nlh = (struct nlmsghdr *)&bc->msg[msg];
4029 assert((bc->size - msg) >= nlh->nlmsg_len);
4030 msg += nlh->nlmsg_len;
4031 rc = flow_tcf_nl_ack(tcf, nlh, NULL, NULL);
4034 "netlink: cleanup error %d", rc);
4042 LIST_INIT(&ctx->nlbuf);
4047 * Collect local IP address rules with scope link attribute on specified
4048 * network device. This is callback routine called by libmnl mnl_cb_run()
4049 * in loop for every message in received packet.
4052 * Pointer to reply header.
4053 * @param[in, out] arg
4054 * Opaque data pointer for this callback.
4057 * A positive, nonzero value on success, negative errno value otherwise
4058 * and rte_errno is set.
4061 flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
4063 struct tcf_nlcb_context *ctx = arg;
4064 struct nlmsghdr *cmd;
4065 struct ifaddrmsg *ifa;
4067 struct nlattr *na_local = NULL;
4068 struct nlattr *na_peer = NULL;
4069 unsigned char family;
4072 if (nlh->nlmsg_type != RTM_NEWADDR) {
4076 ifa = mnl_nlmsg_get_payload(nlh);
4077 family = ifa->ifa_family;
4078 if (ifa->ifa_index != ctx->ifindex ||
4079 ifa->ifa_scope != RT_SCOPE_LINK ||
4080 !(ifa->ifa_flags & IFA_F_PERMANENT) ||
4081 (family != AF_INET && family != AF_INET6))
4083 mnl_attr_for_each(na, nlh, sizeof(*ifa)) {
4084 switch (mnl_attr_get_type(na)) {
4092 if (na_local && na_peer)
4095 if (!na_local || !na_peer)
4097 /* Local rule found with scope link, permanent and assigned peer. */
4098 size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
4099 MNL_ALIGN(sizeof(struct ifaddrmsg)) +
4100 (family == AF_INET6 ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
4101 : 2 * SZ_NLATTR_TYPE_OF(uint32_t));
4102 cmd = flow_tcf_alloc_nlcmd(ctx, size);
4107 cmd = mnl_nlmsg_put_header(cmd);
4108 cmd->nlmsg_type = RTM_DELADDR;
4109 cmd->nlmsg_flags = NLM_F_REQUEST;
4110 ifa = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifa));
4111 ifa->ifa_flags = IFA_F_PERMANENT;
4112 ifa->ifa_scope = RT_SCOPE_LINK;
4113 ifa->ifa_index = ctx->ifindex;
4114 if (family == AF_INET) {
4115 ifa->ifa_family = AF_INET;
4116 ifa->ifa_prefixlen = 32;
4117 mnl_attr_put_u32(cmd, IFA_LOCAL, mnl_attr_get_u32(na_local));
4118 mnl_attr_put_u32(cmd, IFA_ADDRESS, mnl_attr_get_u32(na_peer));
4120 ifa->ifa_family = AF_INET6;
4121 ifa->ifa_prefixlen = 128;
4122 mnl_attr_put(cmd, IFA_LOCAL, IPV6_ADDR_LEN,
4123 mnl_attr_get_payload(na_local));
4124 mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
4125 mnl_attr_get_payload(na_peer));
4127 assert(size == cmd->nlmsg_len);
4132 * Cleanup the local IP addresses on outer interface.
4135 * Context object initialized by mlx5_flow_tcf_context_create().
4136 * @param[in] ifindex
4137 * Network inferface index to perform cleanup.
4140 flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf,
4141 unsigned int ifindex)
4143 struct nlmsghdr *nlh;
4144 struct ifaddrmsg *ifa;
4145 struct tcf_nlcb_context ctx = {
4147 .bufsize = MNL_REQUEST_SIZE,
4148 .nlbuf = LIST_HEAD_INITIALIZER(),
4154 * Seek and destroy leftovers of local IP addresses with
4155 * matching properties "scope link".
4157 nlh = mnl_nlmsg_put_header(tcf->buf);
4158 nlh->nlmsg_type = RTM_GETADDR;
4159 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
4160 ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
4161 ifa->ifa_family = AF_UNSPEC;
4162 ifa->ifa_index = ifindex;
4163 ifa->ifa_scope = RT_SCOPE_LINK;
4164 ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_local_cb, &ctx);
4166 DRV_LOG(WARNING, "netlink: query device list error %d", ret);
4167 ret = flow_tcf_send_nlcmd(tcf, &ctx);
4169 DRV_LOG(WARNING, "netlink: device delete error %d", ret);
4173 * Collect neigh permament rules on specified network device.
4174 * This is callback routine called by libmnl mnl_cb_run() in loop for
4175 * every message in received packet.
4178 * Pointer to reply header.
4179 * @param[in, out] arg
4180 * Opaque data pointer for this callback.
4183 * A positive, nonzero value on success, negative errno value otherwise
4184 * and rte_errno is set.
4187 flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
4189 struct tcf_nlcb_context *ctx = arg;
4190 struct nlmsghdr *cmd;
4193 struct nlattr *na_ip = NULL;
4194 struct nlattr *na_mac = NULL;
4195 unsigned char family;
4198 if (nlh->nlmsg_type != RTM_NEWNEIGH) {
4202 ndm = mnl_nlmsg_get_payload(nlh);
4203 family = ndm->ndm_family;
4204 if (ndm->ndm_ifindex != (int)ctx->ifindex ||
4205 !(ndm->ndm_state & NUD_PERMANENT) ||
4206 (family != AF_INET && family != AF_INET6))
4208 mnl_attr_for_each(na, nlh, sizeof(*ndm)) {
4209 switch (mnl_attr_get_type(na)) {
4217 if (na_mac && na_ip)
4220 if (!na_mac || !na_ip)
4222 /* Neigh rule with permenent attribute found. */
4223 size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
4224 MNL_ALIGN(sizeof(struct ndmsg)) +
4225 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
4226 (family == AF_INET6 ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
4227 : SZ_NLATTR_TYPE_OF(uint32_t));
4228 cmd = flow_tcf_alloc_nlcmd(ctx, size);
4233 cmd = mnl_nlmsg_put_header(cmd);
4234 cmd->nlmsg_type = RTM_DELNEIGH;
4235 cmd->nlmsg_flags = NLM_F_REQUEST;
4236 ndm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ndm));
4237 ndm->ndm_ifindex = ctx->ifindex;
4238 ndm->ndm_state = NUD_PERMANENT;
4241 if (family == AF_INET) {
4242 ndm->ndm_family = AF_INET;
4243 mnl_attr_put_u32(cmd, NDA_DST, mnl_attr_get_u32(na_ip));
4245 ndm->ndm_family = AF_INET6;
4246 mnl_attr_put(cmd, NDA_DST, IPV6_ADDR_LEN,
4247 mnl_attr_get_payload(na_ip));
4249 mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
4250 mnl_attr_get_payload(na_mac));
4251 assert(size == cmd->nlmsg_len);
4256 * Cleanup the neigh rules on outer interface.
4259 * Context object initialized by mlx5_flow_tcf_context_create().
4260 * @param[in] ifindex
4261 * Network inferface index to perform cleanup.
4264 flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf,
4265 unsigned int ifindex)
4267 struct nlmsghdr *nlh;
4269 struct tcf_nlcb_context ctx = {
4271 .bufsize = MNL_REQUEST_SIZE,
4272 .nlbuf = LIST_HEAD_INITIALIZER(),
4277 /* Seek and destroy leftovers of neigh rules. */
4278 nlh = mnl_nlmsg_put_header(tcf->buf);
4279 nlh->nlmsg_type = RTM_GETNEIGH;
4280 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
4281 ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
4282 ndm->ndm_family = AF_UNSPEC;
4283 ndm->ndm_ifindex = ifindex;
4284 ndm->ndm_state = NUD_PERMANENT;
4285 ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_neigh_cb, &ctx);
4287 DRV_LOG(WARNING, "netlink: query device list error %d", ret);
4288 ret = flow_tcf_send_nlcmd(tcf, &ctx);
4290 DRV_LOG(WARNING, "netlink: device delete error %d", ret);
4294 * Collect indices of VXLAN encap/decap interfaces associated with device.
4295 * This is callback routine called by libmnl mnl_cb_run() in loop for
4296 * every message in received packet.
4299 * Pointer to reply header.
4300 * @param[in, out] arg
4301 * Opaque data pointer for this callback.
4304 * A positive, nonzero value on success, negative errno value otherwise
4305 * and rte_errno is set.
4308 flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
4310 struct tcf_nlcb_context *ctx = arg;
4311 struct nlmsghdr *cmd;
4312 struct ifinfomsg *ifm;
4314 struct nlattr *na_info = NULL;
4315 struct nlattr *na_vxlan = NULL;
4317 unsigned int vxindex;
4320 if (nlh->nlmsg_type != RTM_NEWLINK) {
4324 ifm = mnl_nlmsg_get_payload(nlh);
4325 if (!ifm->ifi_index) {
4329 mnl_attr_for_each(na, nlh, sizeof(*ifm))
4330 if (mnl_attr_get_type(na) == IFLA_LINKINFO) {
4336 mnl_attr_for_each_nested(na, na_info) {
4337 switch (mnl_attr_get_type(na)) {
4338 case IFLA_INFO_KIND:
4339 if (!strncmp("vxlan", mnl_attr_get_str(na),
4340 mnl_attr_get_len(na)))
4343 case IFLA_INFO_DATA:
4347 if (found && na_vxlan)
4350 if (!found || !na_vxlan)
4353 mnl_attr_for_each_nested(na, na_vxlan) {
4354 if (mnl_attr_get_type(na) == IFLA_VXLAN_LINK &&
4355 mnl_attr_get_u32(na) == ctx->ifindex) {
4362 /* Attached VXLAN device found, store the command to delete. */
4363 vxindex = ifm->ifi_index;
4364 size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
4365 MNL_ALIGN(sizeof(struct ifinfomsg));
4366 cmd = flow_tcf_alloc_nlcmd(ctx, size);
4371 cmd = mnl_nlmsg_put_header(cmd);
4372 cmd->nlmsg_type = RTM_DELLINK;
4373 cmd->nlmsg_flags = NLM_F_REQUEST;
4374 ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
4375 ifm->ifi_family = AF_UNSPEC;
4376 ifm->ifi_index = vxindex;
4377 assert(size == cmd->nlmsg_len);
4382 * Cleanup the outer interface. Removes all found vxlan devices
4383 * attached to specified index, flushes the neigh and local IP
4387 * Context object initialized by mlx5_flow_tcf_context_create().
4388 * @param[in] ifindex
4389 * Network inferface index to perform cleanup.
4392 flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf,
4393 unsigned int ifindex)
4395 struct nlmsghdr *nlh;
4396 struct ifinfomsg *ifm;
4397 struct tcf_nlcb_context ctx = {
4399 .bufsize = MNL_REQUEST_SIZE,
4400 .nlbuf = LIST_HEAD_INITIALIZER(),
4406 * Seek and destroy leftover VXLAN encap/decap interfaces with
4407 * matching properties.
4409 nlh = mnl_nlmsg_put_header(tcf->buf);
4410 nlh->nlmsg_type = RTM_GETLINK;
4411 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
4412 ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
4413 ifm->ifi_family = AF_UNSPEC;
4414 ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_vxlan_cb, &ctx);
4416 DRV_LOG(WARNING, "netlink: query device list error %d", ret);
4417 ret = flow_tcf_send_nlcmd(tcf, &ctx);
4419 DRV_LOG(WARNING, "netlink: device delete error %d", ret);
4423 * Emit Netlink message to add/remove local address to the outer device.
4424 * The address being added is visible within the link only (scope link).
4426 * Note that an implicit route is maintained by the kernel due to the
4427 * presence of a peer address (IFA_ADDRESS).
4429 * These rules are used for encapsultion only and allow to assign
4430 * the outer tunnel source IP address.
4433 * Libmnl socket context object.
4435 * Encapsulation properties (source address and its peer).
4436 * @param[in] ifindex
4437 * Network interface to apply rule.
4439 * Toggle between add and remove.
4441 * Perform verbose error reporting if not NULL.
4444 * 0 on success, a negative errno value otherwise and rte_errno is set.
4447 flow_tcf_rule_local(struct mlx5_flow_tcf_context *tcf,
4448 const struct flow_tcf_vxlan_encap *encap,
4449 unsigned int ifindex,
4451 struct rte_flow_error *error)
4453 struct nlmsghdr *nlh;
4454 struct ifaddrmsg *ifa;
4455 alignas(struct nlmsghdr)
4456 uint8_t buf[mnl_nlmsg_size(sizeof(*ifa) + 128)];
4458 nlh = mnl_nlmsg_put_header(buf);
4459 nlh->nlmsg_type = enable ? RTM_NEWADDR : RTM_DELADDR;
4461 NLM_F_REQUEST | (enable ? NLM_F_CREATE | NLM_F_REPLACE : 0);
4463 ifa = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifa));
4464 ifa->ifa_flags = IFA_F_PERMANENT;
4465 ifa->ifa_scope = RT_SCOPE_LINK;
4466 ifa->ifa_index = ifindex;
4467 if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) {
4468 ifa->ifa_family = AF_INET;
4469 ifa->ifa_prefixlen = 32;
4470 mnl_attr_put_u32(nlh, IFA_LOCAL, encap->ipv4.src);
4471 if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST)
4472 mnl_attr_put_u32(nlh, IFA_ADDRESS,
4475 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC);
4476 ifa->ifa_family = AF_INET6;
4477 ifa->ifa_prefixlen = 128;
4478 mnl_attr_put(nlh, IFA_LOCAL,
4479 sizeof(encap->ipv6.src),
4481 if (encap->mask & FLOW_TCF_ENCAP_IPV6_DST)
4482 mnl_attr_put(nlh, IFA_ADDRESS,
4483 sizeof(encap->ipv6.dst),
4486 if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL))
4488 return rte_flow_error_set(error, rte_errno,
4489 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4490 "netlink: cannot complete IFA request"
4495 * Emit Netlink message to add/remove neighbor.
4498 * Libmnl socket context object.
4500 * Encapsulation properties (destination address).
4501 * @param[in] ifindex
4502 * Network interface.
4504 * Toggle between add and remove.
4506 * Perform verbose error reporting if not NULL.
4509 * 0 on success, a negative errno value otherwise and rte_errno is set.
4512 flow_tcf_rule_neigh(struct mlx5_flow_tcf_context *tcf,
4513 const struct flow_tcf_vxlan_encap *encap,
4514 unsigned int ifindex,
4516 struct rte_flow_error *error)
4518 struct nlmsghdr *nlh;
4520 alignas(struct nlmsghdr)
4521 uint8_t buf[mnl_nlmsg_size(sizeof(*ndm) + 128)];
4523 nlh = mnl_nlmsg_put_header(buf);
4524 nlh->nlmsg_type = enable ? RTM_NEWNEIGH : RTM_DELNEIGH;
4526 NLM_F_REQUEST | (enable ? NLM_F_CREATE | NLM_F_REPLACE : 0);
4528 ndm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ndm));
4529 ndm->ndm_ifindex = ifindex;
4530 ndm->ndm_state = NUD_PERMANENT;
4533 if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) {
4534 ndm->ndm_family = AF_INET;
4535 mnl_attr_put_u32(nlh, NDA_DST, encap->ipv4.dst);
4537 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST);
4538 ndm->ndm_family = AF_INET6;
4539 mnl_attr_put(nlh, NDA_DST, sizeof(encap->ipv6.dst),
4542 if (encap->mask & FLOW_TCF_ENCAP_ETH_SRC && enable)
4544 "outer ethernet source address cannot be "
4545 "forced for VXLAN encapsulation");
4546 if (encap->mask & FLOW_TCF_ENCAP_ETH_DST)
4547 mnl_attr_put(nlh, NDA_LLADDR, sizeof(encap->eth.dst),
4549 if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL))
4551 return rte_flow_error_set(error, rte_errno,
4552 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4553 "netlink: cannot complete ND request"
4558 * Manage the local IP addresses and their peers IP addresses on the
4559 * outer interface for encapsulation purposes. The kernel searches the
4560 * appropriate device for tunnel egress traffic using the outer source
4561 * IP, this IP should be assigned to the outer network device, otherwise
4562 * kernel rejects the rule.
4564 * Adds or removes the addresses using the Netlink command like this:
4565 * ip addr add <src_ip> peer <dst_ip> scope link dev <ifouter>
4567 * The addresses are local to the netdev ("scope link"), this reduces
4568 * the risk of conflicts. Note that an implicit route is maintained by
4569 * the kernel due to the presence of a peer address (IFA_ADDRESS).
4572 * Libmnl socket context object.
4574 * Object, contains rule database and ifouter index.
4575 * @param[in] dev_flow
4576 * Flow object, contains the tunnel parameters (for encap only).
4578 * Toggle between add and remove.
4580 * Perform verbose error reporting if not NULL.
4583 * 0 on success, a negative errno value otherwise and rte_errno is set.
4586 flow_tcf_encap_local(struct mlx5_flow_tcf_context *tcf,
4587 struct tcf_irule *iface,
4588 struct mlx5_flow *dev_flow,
4590 struct rte_flow_error *error)
4592 const struct flow_tcf_vxlan_encap *encap = dev_flow->tcf.vxlan_encap;
4593 struct tcf_local_rule *rule = NULL;
4597 assert(encap->hdr.type == FLOW_TCF_TUNACT_VXLAN_ENCAP);
4598 if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) {
4599 assert(encap->mask & FLOW_TCF_ENCAP_IPV4_DST);
4600 LIST_FOREACH(rule, &iface->local, next) {
4601 if (rule->mask & FLOW_TCF_ENCAP_IPV4_SRC &&
4602 encap->ipv4.src == rule->ipv4.src &&
4603 encap->ipv4.dst == rule->ipv4.dst) {
4608 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC);
4609 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST);
4610 LIST_FOREACH(rule, &iface->local, next) {
4611 if (rule->mask & FLOW_TCF_ENCAP_IPV6_SRC &&
4612 !memcmp(&encap->ipv6.src, &rule->ipv6.src,
4613 sizeof(encap->ipv6.src)) &&
4614 !memcmp(&encap->ipv6.dst, &rule->ipv6.dst,
4615 sizeof(encap->ipv6.dst))) {
4625 if (!rule->refcnt || !--rule->refcnt) {
4626 LIST_REMOVE(rule, next);
4627 return flow_tcf_rule_local(tcf, encap,
4628 iface->ifouter, false, error);
4633 DRV_LOG(WARNING, "disabling not existing local rule");
4634 rte_flow_error_set(error, ENOENT,
4635 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4636 "disabling not existing local rule");
4639 rule = rte_zmalloc(__func__, sizeof(struct tcf_local_rule),
4640 alignof(struct tcf_local_rule));
4642 rte_flow_error_set(error, ENOMEM,
4643 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4644 "unable to allocate memory for local rule");
4647 *rule = (struct tcf_local_rule){.refcnt = 0,
4650 if (encap->mask & FLOW_TCF_ENCAP_IPV4_SRC) {
4651 rule->mask = FLOW_TCF_ENCAP_IPV4_SRC
4652 | FLOW_TCF_ENCAP_IPV4_DST;
4653 rule->ipv4.src = encap->ipv4.src;
4654 rule->ipv4.dst = encap->ipv4.dst;
4656 rule->mask = FLOW_TCF_ENCAP_IPV6_SRC
4657 | FLOW_TCF_ENCAP_IPV6_DST;
4658 memcpy(&rule->ipv6.src, &encap->ipv6.src, IPV6_ADDR_LEN);
4659 memcpy(&rule->ipv6.dst, &encap->ipv6.dst, IPV6_ADDR_LEN);
4661 ret = flow_tcf_rule_local(tcf, encap, iface->ifouter, true, error);
4667 LIST_INSERT_HEAD(&iface->local, rule, next);
4672 * Manage the destination MAC/IP addresses neigh database, kernel uses
4673 * this one to determine the destination MAC address within encapsulation
4674 * header. Adds or removes the entries using the Netlink command like this:
4675 * ip neigh add dev <ifouter> lladdr <dst_mac> to <dst_ip> nud permanent
4678 * Libmnl socket context object.
4680 * Object, contains rule database and ifouter index.
4681 * @param[in] dev_flow
4682 * Flow object, contains the tunnel parameters (for encap only).
4684 * Toggle between add and remove.
4686 * Perform verbose error reporting if not NULL.
4689 * 0 on success, a negative errno value otherwise and rte_errno is set.
4692 flow_tcf_encap_neigh(struct mlx5_flow_tcf_context *tcf,
4693 struct tcf_irule *iface,
4694 struct mlx5_flow *dev_flow,
4696 struct rte_flow_error *error)
4698 const struct flow_tcf_vxlan_encap *encap = dev_flow->tcf.vxlan_encap;
4699 struct tcf_neigh_rule *rule = NULL;
4703 assert(encap->hdr.type == FLOW_TCF_TUNACT_VXLAN_ENCAP);
4704 if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) {
4705 assert(encap->mask & FLOW_TCF_ENCAP_IPV4_SRC);
4706 LIST_FOREACH(rule, &iface->neigh, next) {
4707 if (rule->mask & FLOW_TCF_ENCAP_IPV4_DST &&
4708 encap->ipv4.dst == rule->ipv4.dst) {
4713 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_SRC);
4714 assert(encap->mask & FLOW_TCF_ENCAP_IPV6_DST);
4715 LIST_FOREACH(rule, &iface->neigh, next) {
4716 if (rule->mask & FLOW_TCF_ENCAP_IPV6_DST &&
4717 !memcmp(&encap->ipv6.dst, &rule->ipv6.dst,
4718 sizeof(encap->ipv6.dst))) {
4724 if (memcmp(&encap->eth.dst, &rule->eth,
4725 sizeof(encap->eth.dst))) {
4726 DRV_LOG(WARNING, "Destination MAC differs"
4728 rte_flow_error_set(error, EEXIST,
4729 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4730 NULL, "Different MAC address"
4731 " neigh rule for the same"
4739 if (!rule->refcnt || !--rule->refcnt) {
4740 LIST_REMOVE(rule, next);
4741 return flow_tcf_rule_neigh(tcf, encap,
4748 DRV_LOG(WARNING, "Disabling not existing neigh rule");
4749 rte_flow_error_set(error, ENOENT,
4750 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4751 "unable to allocate memory for neigh rule");
4754 rule = rte_zmalloc(__func__, sizeof(struct tcf_neigh_rule),
4755 alignof(struct tcf_neigh_rule));
4757 rte_flow_error_set(error, ENOMEM,
4758 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4759 "unable to allocate memory for neigh rule");
4762 *rule = (struct tcf_neigh_rule){.refcnt = 0,
4765 if (encap->mask & FLOW_TCF_ENCAP_IPV4_DST) {
4766 rule->mask = FLOW_TCF_ENCAP_IPV4_DST;
4767 rule->ipv4.dst = encap->ipv4.dst;
4769 rule->mask = FLOW_TCF_ENCAP_IPV6_DST;
4770 memcpy(&rule->ipv6.dst, &encap->ipv6.dst, IPV6_ADDR_LEN);
4772 memcpy(&rule->eth, &encap->eth.dst, sizeof(rule->eth));
4773 ret = flow_tcf_rule_neigh(tcf, encap, iface->ifouter, true, error);
4779 LIST_INSERT_HEAD(&iface->neigh, rule, next);
4783 /* VXLAN encap rule database for outer interfaces. */
4784 static LIST_HEAD(, tcf_irule) iface_list_vxlan = LIST_HEAD_INITIALIZER();
4786 /* VTEP device list is shared between PMD port instances. */
4787 static LIST_HEAD(, tcf_vtep) vtep_list_vxlan = LIST_HEAD_INITIALIZER();
4788 static pthread_mutex_t vtep_list_mutex = PTHREAD_MUTEX_INITIALIZER;
4791 * Acquire the VXLAN encap rules container for specified interface.
4792 * First looks for the container in the existing ones list, creates
4793 * and initializes the new container if existing not found.
4796 * Context object initialized by mlx5_flow_tcf_context_create().
4797 * @param[in] ifouter
4798 * Network interface index to create VXLAN encap rules on.
4800 * Perform verbose error reporting if not NULL.
4802 * Rule container pointer on success,
4803 * NULL otherwise and rte_errno is set.
4805 static struct tcf_irule*
4806 flow_tcf_encap_irule_acquire(struct mlx5_flow_tcf_context *tcf,
4807 unsigned int ifouter,
4808 struct rte_flow_error *error)
4810 struct tcf_irule *iface;
4812 /* Look whether the container for encap rules is created. */
4814 LIST_FOREACH(iface, &iface_list_vxlan, next) {
4815 if (iface->ifouter == ifouter)
4819 /* Container already exists, just increment the reference. */
4823 /* Not found, we should create the new container. */
4824 iface = rte_zmalloc(__func__, sizeof(*iface),
4825 alignof(struct tcf_irule));
4827 rte_flow_error_set(error, ENOMEM,
4828 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4829 "unable to allocate memory for container");
4832 *iface = (struct tcf_irule){
4833 .local = LIST_HEAD_INITIALIZER(),
4834 .neigh = LIST_HEAD_INITIALIZER(),
4838 /* Interface cleanup for new container created. */
4839 flow_tcf_encap_iface_cleanup(tcf, ifouter);
4840 flow_tcf_encap_local_cleanup(tcf, ifouter);
4841 flow_tcf_encap_neigh_cleanup(tcf, ifouter);
4842 LIST_INSERT_HEAD(&iface_list_vxlan, iface, next);
4847 * Releases VXLAN encap rules container by pointer. Decrements the
4848 * reference cointer and deletes the container if counter is zero.
4851 * VXLAN rule container pointer to release.
4854 flow_tcf_encap_irule_release(struct tcf_irule *iface)
4856 assert(iface->refcnt);
4857 if (--iface->refcnt == 0) {
4858 /* Reference counter is zero, delete the container. */
4859 assert(LIST_EMPTY(&iface->local));
4860 assert(LIST_EMPTY(&iface->neigh));
4861 LIST_REMOVE(iface, next);
4867 * Deletes VTEP network device.
4870 * Context object initialized by mlx5_flow_tcf_context_create().
4872 * Object represinting the network device to delete. Memory
4873 * allocated for this object is freed by routine.
4876 flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf,
4877 struct tcf_vtep *vtep)
4879 struct nlmsghdr *nlh;
4880 struct ifinfomsg *ifm;
4881 alignas(struct nlmsghdr)
4882 uint8_t buf[mnl_nlmsg_size(MNL_ALIGN(sizeof(*ifm))) +
4883 MNL_BUF_EXTRA_SPACE];
4886 assert(!vtep->refcnt);
4887 /* Delete only ifaces those we actually created. */
4888 if (vtep->created && vtep->ifindex) {
4889 DRV_LOG(INFO, "VTEP delete (%d)", vtep->ifindex);
4890 nlh = mnl_nlmsg_put_header(buf);
4891 nlh->nlmsg_type = RTM_DELLINK;
4892 nlh->nlmsg_flags = NLM_F_REQUEST;
4893 ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
4894 ifm->ifi_family = AF_UNSPEC;
4895 ifm->ifi_index = vtep->ifindex;
4896 assert(sizeof(buf) >= nlh->nlmsg_len);
4897 ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL);
4899 DRV_LOG(WARNING, "netlink: error deleting vxlan"
4900 " encap/decap ifindex %u",
4907 * Creates VTEP network device.
4910 * Context object initialized by mlx5_flow_tcf_context_create().
4912 * UDP port of created VTEP device.
4914 * Perform verbose error reporting if not NULL.
4917 * Pointer to created device structure on success,
4918 * NULL otherwise and rte_errno is set.
4920 static struct tcf_vtep*
4921 flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
4922 uint16_t port, struct rte_flow_error *error)
4924 struct tcf_vtep *vtep;
4925 struct nlmsghdr *nlh;
4926 struct ifinfomsg *ifm;
4927 char name[sizeof(MLX5_VXLAN_DEVICE_PFX) + 24];
4928 alignas(struct nlmsghdr)
4929 uint8_t buf[mnl_nlmsg_size(sizeof(*ifm)) +
4930 SZ_NLATTR_DATA_OF(sizeof(name)) +
4931 SZ_NLATTR_NEST * 2 +
4932 SZ_NLATTR_STRZ_OF("vxlan") +
4933 SZ_NLATTR_DATA_OF(sizeof(uint32_t)) +
4934 SZ_NLATTR_DATA_OF(sizeof(uint16_t)) +
4935 SZ_NLATTR_DATA_OF(sizeof(uint8_t)) * 3 +
4936 MNL_BUF_EXTRA_SPACE];
4937 struct nlattr *na_info;
4938 struct nlattr *na_vxlan;
4939 rte_be16_t vxlan_port = rte_cpu_to_be_16(port);
4942 vtep = rte_zmalloc(__func__, sizeof(*vtep), alignof(struct tcf_vtep));
4944 rte_flow_error_set(error, ENOMEM,
4945 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4946 "unable to allocate memory for VTEP");
4949 *vtep = (struct tcf_vtep){
4952 memset(buf, 0, sizeof(buf));
4953 nlh = mnl_nlmsg_put_header(buf);
4954 nlh->nlmsg_type = RTM_NEWLINK;
4955 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
4956 ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
4957 ifm->ifi_family = AF_UNSPEC;
4960 ifm->ifi_flags = IFF_UP;
4961 ifm->ifi_change = 0xffffffff;
4962 snprintf(name, sizeof(name), "%s%u", MLX5_VXLAN_DEVICE_PFX, port);
4963 mnl_attr_put_strz(nlh, IFLA_IFNAME, name);
4964 na_info = mnl_attr_nest_start(nlh, IFLA_LINKINFO);
4966 mnl_attr_put_strz(nlh, IFLA_INFO_KIND, "vxlan");
4967 na_vxlan = mnl_attr_nest_start(nlh, IFLA_INFO_DATA);
4969 #ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA
4971 * RH 7.2 does not support metadata for tunnel device.
4972 * It does not matter because we are going to use the
4973 * hardware offload by mlx5 driver.
4975 mnl_attr_put_u8(nlh, IFLA_VXLAN_COLLECT_METADATA, 1);
4977 mnl_attr_put_u8(nlh, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
4978 mnl_attr_put_u8(nlh, IFLA_VXLAN_LEARNING, 0);
4979 mnl_attr_put_u16(nlh, IFLA_VXLAN_PORT, vxlan_port);
4980 #ifndef HAVE_IFLA_VXLAN_COLLECT_METADATA
4982 * We must specify VNI explicitly if metadata not supported.
4983 * Note, VNI is transferred with native endianness format.
4985 mnl_attr_put_u16(nlh, IFLA_VXLAN_ID, MLX5_VXLAN_DEFAULT_VNI);
4987 mnl_attr_nest_end(nlh, na_vxlan);
4988 mnl_attr_nest_end(nlh, na_info);
4989 assert(sizeof(buf) >= nlh->nlmsg_len);
4990 ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL);
4993 "netlink: VTEP %s create failure (%d)",
4995 if (rte_errno != EEXIST)
4997 * Some unhandled error occurred or device is
4998 * for encapsulation and cannot be shared.
5003 * Mark device we actually created.
5004 * We should explicitly delete
5005 * when we do not need it anymore.
5009 /* Try to get ifindex of created of pre-existing device. */
5010 ret = if_nametoindex(name);
5013 "VTEP %s failed to get index (%d)", name, errno);
5016 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5017 "netlink: failed to retrieve VTEP ifindex");
5020 vtep->ifindex = ret;
5021 memset(buf, 0, sizeof(buf));
5022 nlh = mnl_nlmsg_put_header(buf);
5023 nlh->nlmsg_type = RTM_NEWLINK;
5024 nlh->nlmsg_flags = NLM_F_REQUEST;
5025 ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
5026 ifm->ifi_family = AF_UNSPEC;
5028 ifm->ifi_index = vtep->ifindex;
5029 ifm->ifi_flags = IFF_UP;
5030 ifm->ifi_change = IFF_UP;
5031 ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL);
5033 rte_flow_error_set(error, -errno,
5034 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5035 "netlink: failed to set VTEP link up");
5036 DRV_LOG(WARNING, "netlink: VTEP %s set link up failure (%d)",
5040 ret = mlx5_flow_tcf_init(tcf, vtep->ifindex, error);
5042 DRV_LOG(WARNING, "VTEP %s init failure (%d)", name, rte_errno);
5045 DRV_LOG(INFO, "VTEP create (%d, %d)", vtep->port, vtep->ifindex);
5049 flow_tcf_vtep_delete(tcf, vtep);
5057 * Acquire target interface index for VXLAN tunneling decapsulation.
5058 * In order to share the UDP port within the other interfaces the
5059 * VXLAN device created as not attached to any interface (if created).
5062 * Context object initialized by mlx5_flow_tcf_context_create().
5063 * @param[in] dev_flow
5064 * Flow tcf object with tunnel structure pointer set.
5066 * Perform verbose error reporting if not NULL.
5068 * Interface descriptor pointer on success,
5069 * NULL otherwise and rte_errno is set.
5071 static struct tcf_vtep*
5072 flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
5073 struct mlx5_flow *dev_flow,
5074 struct rte_flow_error *error)
5076 struct tcf_vtep *vtep;
5077 uint16_t port = dev_flow->tcf.vxlan_decap->udp_port;
5079 LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
5080 if (vtep->port == port)
5084 /* Device exists, just increment the reference counter. */
5086 assert(vtep->ifindex);
5089 /* No decapsulation device exists, try to create the new one. */
5090 vtep = flow_tcf_vtep_create(tcf, port, error);
5092 LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
5097 * Aqcuire target interface index for VXLAN tunneling encapsulation.
5100 * Context object initialized by mlx5_flow_tcf_context_create().
5101 * @param[in] ifouter
5102 * Network interface index to attach VXLAN encap device to.
5103 * @param[in] dev_flow
5104 * Flow tcf object with tunnel structure pointer set.
5106 * Perform verbose error reporting if not NULL.
5108 * Interface descriptor pointer on success,
5109 * NULL otherwise and rte_errno is set.
5111 static struct tcf_vtep*
5112 flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
5113 unsigned int ifouter,
5114 struct mlx5_flow *dev_flow,
5115 struct rte_flow_error *error)
5117 static uint16_t port;
5118 struct tcf_vtep *vtep;
5119 struct tcf_irule *iface;
5123 /* Look whether the VTEP for specified port is created. */
5124 port = rte_be_to_cpu_16(dev_flow->tcf.vxlan_encap->udp.dst);
5125 LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
5126 if (vtep->port == port)
5130 /* VTEP already exists, just increment the reference. */
5133 /* Not found, we should create the new VTEP. */
5134 vtep = flow_tcf_vtep_create(tcf, port, error);
5137 LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
5139 assert(vtep->ifindex);
5140 iface = flow_tcf_encap_irule_acquire(tcf, ifouter, error);
5142 if (--vtep->refcnt == 0)
5143 flow_tcf_vtep_delete(tcf, vtep);
5146 dev_flow->tcf.vxlan_encap->iface = iface;
5147 /* Create local ipaddr with peer to specify the outer IPs. */
5148 ret = flow_tcf_encap_local(tcf, iface, dev_flow, true, error);
5150 /* Create neigh rule to specify outer destination MAC. */
5151 ret = flow_tcf_encap_neigh(tcf, iface, dev_flow, true, error);
5153 flow_tcf_encap_local(tcf, iface,
5154 dev_flow, false, error);
5157 dev_flow->tcf.vxlan_encap->iface = NULL;
5158 flow_tcf_encap_irule_release(iface);
5159 if (--vtep->refcnt == 0)
5160 flow_tcf_vtep_delete(tcf, vtep);
5167 * Acquires target interface index for tunneling of any type.
5168 * Creates the new VTEP if needed.
5171 * Context object initialized by mlx5_flow_tcf_context_create().
5172 * @param[in] ifouter
5173 * Network interface index to create VXLAN encap rules on.
5174 * @param[in] dev_flow
5175 * Flow tcf object with tunnel structure pointer set.
5177 * Perform verbose error reporting if not NULL.
5179 * Interface descriptor pointer on success,
5180 * NULL otherwise and rte_errno is set.
5182 static struct tcf_vtep*
5183 flow_tcf_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
5184 unsigned int ifouter,
5185 struct mlx5_flow *dev_flow,
5186 struct rte_flow_error *error)
5188 struct tcf_vtep *vtep = NULL;
5190 assert(dev_flow->tcf.tunnel);
5191 pthread_mutex_lock(&vtep_list_mutex);
5192 switch (dev_flow->tcf.tunnel->type) {
5193 case FLOW_TCF_TUNACT_VXLAN_ENCAP:
5194 vtep = flow_tcf_encap_vtep_acquire(tcf, ifouter,
5197 case FLOW_TCF_TUNACT_VXLAN_DECAP:
5198 vtep = flow_tcf_decap_vtep_acquire(tcf, dev_flow, error);
5201 rte_flow_error_set(error, ENOTSUP,
5202 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5203 "unsupported tunnel type");
5206 pthread_mutex_unlock(&vtep_list_mutex);
5211 * Release tunneling interface by ifindex. Decrements reference
5212 * counter and actually removes the device if counter is zero.
5215 * Context object initialized by mlx5_flow_tcf_context_create().
5217 * VTEP device descriptor structure.
5218 * @param[in] dev_flow
5219 * Flow tcf object with tunnel structure pointer set.
5222 flow_tcf_vtep_release(struct mlx5_flow_tcf_context *tcf,
5223 struct tcf_vtep *vtep,
5224 struct mlx5_flow *dev_flow)
5226 assert(dev_flow->tcf.tunnel);
5227 pthread_mutex_lock(&vtep_list_mutex);
5228 switch (dev_flow->tcf.tunnel->type) {
5229 case FLOW_TCF_TUNACT_VXLAN_DECAP:
5231 case FLOW_TCF_TUNACT_VXLAN_ENCAP: {
5232 struct tcf_irule *iface;
5234 /* Remove the encap ancillary rules first. */
5235 iface = dev_flow->tcf.vxlan_encap->iface;
5237 flow_tcf_encap_neigh(tcf, iface, dev_flow, false, NULL);
5238 flow_tcf_encap_local(tcf, iface, dev_flow, false, NULL);
5239 flow_tcf_encap_irule_release(iface);
5240 dev_flow->tcf.vxlan_encap->iface = NULL;
5245 DRV_LOG(WARNING, "Unsupported tunnel type");
5248 assert(vtep->refcnt);
5249 if (--vtep->refcnt == 0) {
5250 LIST_REMOVE(vtep, next);
5251 flow_tcf_vtep_delete(tcf, vtep);
5253 pthread_mutex_unlock(&vtep_list_mutex);
5256 struct tcf_nlcb_query {
5259 uint32_t flags_valid:1;
5263 * Collect queried rule attributes. This is callback routine called by
5264 * libmnl mnl_cb_run() in loop for every message in received packet.
5265 * Current implementation collects the flower flags only.
5268 * Pointer to reply header.
5269 * @param[in, out] arg
5270 * Context pointer for this callback.
5273 * A positive, nonzero value on success (required by libmnl
5274 * to continue messages processing).
5277 flow_tcf_collect_query_cb(const struct nlmsghdr *nlh, void *arg)
5279 struct tcf_nlcb_query *query = arg;
5280 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
5281 struct nlattr *na, *na_opt;
5282 bool flower = false;
5284 if (nlh->nlmsg_type != RTM_NEWTFILTER ||
5285 tcm->tcm_handle != query->handle)
5287 mnl_attr_for_each(na, nlh, sizeof(*tcm)) {
5288 switch (mnl_attr_get_type(na)) {
5290 if (strcmp(mnl_attr_get_payload(na), "flower")) {
5291 /* Not flower filter, drop entire message. */
5298 /* Not flower options, drop entire message. */
5301 /* Check nested flower options. */
5302 mnl_attr_for_each_nested(na_opt, na) {
5303 switch (mnl_attr_get_type(na_opt)) {
5304 case TCA_FLOWER_FLAGS:
5305 query->flags_valid = 1;
5307 mnl_attr_get_u32(na_opt);
5318 * Query a TC flower rule flags via netlink.
5321 * Context object initialized by mlx5_flow_tcf_context_create().
5322 * @param[in] dev_flow
5323 * Pointer to the flow.
5324 * @param[out] pflags
5325 * pointer to the data retrieved by the query.
5328 * 0 on success, a negative errno value otherwise.
5331 flow_tcf_query_flags(struct mlx5_flow_tcf_context *tcf,
5332 struct mlx5_flow *dev_flow,
5335 struct nlmsghdr *nlh;
5337 struct tcf_nlcb_query query = {
5338 .handle = dev_flow->tcf.tcm->tcm_handle,
5341 nlh = mnl_nlmsg_put_header(tcf->buf);
5342 nlh->nlmsg_type = RTM_GETTFILTER;
5343 nlh->nlmsg_flags = NLM_F_REQUEST;
5344 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
5345 memcpy(tcm, dev_flow->tcf.tcm, sizeof(*tcm));
5347 * Ignore Netlink error for filter query operations.
5348 * The reply length is sent by kernel as errno.
5349 * Just check we got the flags option.
5351 flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_query_cb, &query);
5352 if (!query.flags_valid) {
5356 *pflags = query.tc_flags;
5361 * Query and check the in_hw set for specified rule.
5364 * Context object initialized by mlx5_flow_tcf_context_create().
5365 * @param[in] dev_flow
5366 * Pointer to the flow to check.
5369 * 0 on success, a negative errno value otherwise.
5372 flow_tcf_check_inhw(struct mlx5_flow_tcf_context *tcf,
5373 struct mlx5_flow *dev_flow)
5378 ret = flow_tcf_query_flags(tcf, dev_flow, &flags);
5381 return (flags & TCA_CLS_FLAGS_IN_HW) ? 0 : -ENOENT;
5385 * Remove flow from E-Switch by sending Netlink message.
5388 * Pointer to Ethernet device.
5389 * @param[in, out] flow
5390 * Pointer to the sub flow.
5393 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
5395 struct priv *priv = dev->data->dev_private;
5396 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
5397 struct mlx5_flow *dev_flow;
5398 struct nlmsghdr *nlh;
5402 dev_flow = LIST_FIRST(&flow->dev_flows);
5405 /* E-Switch flow can't be expanded. */
5406 assert(!LIST_NEXT(dev_flow, next));
5407 if (dev_flow->tcf.applied) {
5408 nlh = dev_flow->tcf.nlh;
5409 nlh->nlmsg_type = RTM_DELTFILTER;
5410 nlh->nlmsg_flags = NLM_F_REQUEST;
5411 flow_tcf_nl_ack(ctx, nlh, NULL, NULL);
5412 if (dev_flow->tcf.tunnel) {
5413 assert(dev_flow->tcf.tunnel->vtep);
5414 flow_tcf_vtep_release(ctx,
5415 dev_flow->tcf.tunnel->vtep,
5417 dev_flow->tcf.tunnel->vtep = NULL;
5419 dev_flow->tcf.applied = 0;
5424 * Apply flow to E-Switch by sending Netlink message.
5427 * Pointer to Ethernet device.
5428 * @param[in, out] flow
5429 * Pointer to the sub flow.
5431 * Pointer to the error structure.
5434 * 0 on success, a negative errno value otherwise and rte_errno is set.
5437 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
5438 struct rte_flow_error *error)
5440 struct priv *priv = dev->data->dev_private;
5441 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
5442 struct mlx5_flow *dev_flow;
5443 struct nlmsghdr *nlh;
5445 dev_flow = LIST_FIRST(&flow->dev_flows);
5446 /* E-Switch flow can't be expanded. */
5447 assert(!LIST_NEXT(dev_flow, next));
5448 if (dev_flow->tcf.applied)
5450 nlh = dev_flow->tcf.nlh;
5451 nlh->nlmsg_type = RTM_NEWTFILTER;
5452 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
5453 if (dev_flow->tcf.tunnel) {
5455 * Replace the interface index, target for
5456 * encapsulation, source for decapsulation.
5458 assert(!dev_flow->tcf.tunnel->vtep);
5459 assert(dev_flow->tcf.tunnel->ifindex_ptr);
5460 /* Acquire actual VTEP device when rule is being applied. */
5461 dev_flow->tcf.tunnel->vtep =
5462 flow_tcf_vtep_acquire(ctx,
5463 dev_flow->tcf.tunnel->ifindex_org,
5465 if (!dev_flow->tcf.tunnel->vtep)
5467 DRV_LOG(INFO, "Replace ifindex: %d->%d",
5468 dev_flow->tcf.tunnel->vtep->ifindex,
5469 dev_flow->tcf.tunnel->ifindex_org);
5470 *dev_flow->tcf.tunnel->ifindex_ptr =
5471 dev_flow->tcf.tunnel->vtep->ifindex;
5473 if (!flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) {
5474 dev_flow->tcf.applied = 1;
5475 if (*dev_flow->tcf.ptc_flags & TCA_CLS_FLAGS_SKIP_SW)
5478 * Rule was applied without skip_sw flag set.
5479 * We should check whether the rule was acctually
5480 * accepted by hardware (have look at in_hw flag).
5482 if (flow_tcf_check_inhw(ctx, dev_flow)) {
5483 flow_tcf_remove(dev, flow);
5484 return rte_flow_error_set
5486 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5487 "netlink: rule has no in_hw flag set");
5491 if (dev_flow->tcf.tunnel) {
5492 /* Rollback the VTEP configuration if rule apply failed. */
5493 assert(dev_flow->tcf.tunnel->vtep);
5494 flow_tcf_vtep_release(ctx, dev_flow->tcf.tunnel->vtep,
5496 dev_flow->tcf.tunnel->vtep = NULL;
5498 return rte_flow_error_set(error, rte_errno,
5499 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5500 "netlink: failed to create TC flow rule");
5504 * Remove flow from E-Switch and release resources of the device flow.
5507 * Pointer to Ethernet device.
5508 * @param[in, out] flow
5509 * Pointer to the sub flow.
5512 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
5514 struct mlx5_flow *dev_flow;
5518 flow_tcf_remove(dev, flow);
5519 if (flow->counter) {
5520 if (--flow->counter->ref_cnt == 0) {
5521 rte_free(flow->counter);
5522 flow->counter = NULL;
5525 dev_flow = LIST_FIRST(&flow->dev_flows);
5528 /* E-Switch flow can't be expanded. */
5529 assert(!LIST_NEXT(dev_flow, next));
5530 LIST_REMOVE(dev_flow, next);
5535 * Helper routine for figuring the space size required for a parse buffer.
5538 * array of values to use.
5540 * Current location in array.
5542 * Value to compare with.
5545 * The maximum between the given value and the array value on index.
5548 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
5550 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
5554 * Parse rtnetlink message attributes filling the attribute table with the info
5558 * Attribute table to be filled.
5560 * Maxinum entry in the attribute table.
5562 * The attributes section in the message to be parsed.
5564 * The length of the attributes section in the message.
5567 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
5568 struct rtattr *rta, int len)
5570 unsigned short type;
5571 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
5572 while (RTA_OK(rta, len)) {
5573 type = rta->rta_type;
5574 if (type <= max && !tb[type])
5576 rta = RTA_NEXT(rta, len);
5581 * Extract flow counters from flower action.
5584 * flower action stats properties in the Netlink message received.
5586 * The backward sequence of rta_types, as written in the attribute table,
5587 * we need to traverse in order to get to the requested object.
5589 * Current location in rta_type table.
5591 * data holding the count statistics of the rte_flow retrieved from
5595 * 0 if data was found and retrieved, -1 otherwise.
5598 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
5599 uint16_t rta_type[], int idx,
5600 struct gnet_stats_basic *data)
5602 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
5604 struct rtattr *tbs[tca_stats_max + 1];
5606 if (rta == NULL || idx < 0)
5608 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
5609 RTA_DATA(rta), RTA_PAYLOAD(rta));
5610 switch (rta_type[idx]) {
5611 case TCA_STATS_BASIC:
5612 if (tbs[TCA_STATS_BASIC]) {
5613 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
5614 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
5626 * Parse flower single action retrieving the requested action attribute,
5630 * flower action properties in the Netlink message received.
5632 * The backward sequence of rta_types, as written in the attribute table,
5633 * we need to traverse in order to get to the requested object.
5635 * Current location in rta_type table.
5637 * Count statistics retrieved from the message query.
5640 * 0 if data was found and retrieved, -1 otherwise.
5643 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
5644 uint16_t rta_type[], int idx, void *data)
5646 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
5647 struct rtattr *tb[tca_act_max + 1];
5649 if (arg == NULL || idx < 0)
5651 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
5652 RTA_DATA(arg), RTA_PAYLOAD(arg));
5653 if (tb[TCA_ACT_KIND] == NULL)
5655 switch (rta_type[idx]) {
5657 if (tb[TCA_ACT_STATS])
5658 return flow_tcf_nl_action_stats_parse_and_get
5661 (struct gnet_stats_basic *)data);
5670 * Parse flower action section in the message retrieving the requested
5671 * attribute from the first action that provides it.
5674 * flower section in the Netlink message received.
5676 * The backward sequence of rta_types, as written in the attribute table,
5677 * we need to traverse in order to get to the requested object.
5679 * Current location in rta_type table.
5681 * data retrieved from the message query.
5684 * 0 if data was found and retrieved, -1 otherwise.
5687 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
5688 uint16_t rta_type[], int idx, void *data)
5690 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
5693 if (arg == NULL || idx < 0)
5695 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
5696 RTA_DATA(arg), RTA_PAYLOAD(arg));
5697 switch (rta_type[idx]) {
5699 * flow counters are stored in the actions defined by the flow
5700 * and not in the flow itself, therefore we need to traverse the
5701 * flower chain of actions in search for them.
5703 * Note that the index is not decremented here.
5706 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
5708 !flow_tcf_nl_parse_one_action_and_get(tb[i],
5721 * Parse flower classifier options in the message, retrieving the requested
5722 * attribute if found.
5725 * flower section in the Netlink message received.
5727 * The backward sequence of rta_types, as written in the attribute table,
5728 * we need to traverse in order to get to the requested object.
5730 * Current location in rta_type table.
5732 * data retrieved from the message query.
5735 * 0 if data was found and retrieved, -1 otherwise.
5738 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
5739 uint16_t rta_type[], int idx, void *data)
5741 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
5743 struct rtattr *tb[tca_flower_max + 1];
5745 if (!opt || idx < 0)
5747 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
5748 RTA_DATA(opt), RTA_PAYLOAD(opt));
5749 switch (rta_type[idx]) {
5750 case TCA_FLOWER_ACT:
5751 if (tb[TCA_FLOWER_ACT])
5752 return flow_tcf_nl_action_parse_and_get
5753 (tb[TCA_FLOWER_ACT],
5754 rta_type, --idx, data);
5763 * Parse Netlink reply on filter query, retrieving the flow counters.
5766 * Message received from Netlink.
5768 * The backward sequence of rta_types, as written in the attribute table,
5769 * we need to traverse in order to get to the requested object.
5771 * Current location in rta_type table.
5773 * data retrieved from the message query.
5776 * 0 if data was found and retrieved, -1 otherwise.
5779 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
5780 uint16_t rta_type[], int idx, void *data)
5782 struct nlmsghdr *nlh = cnlh;
5783 struct tcmsg *t = NLMSG_DATA(nlh);
5784 int len = nlh->nlmsg_len;
5785 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
5786 struct rtattr *tb[tca_max + 1];
5790 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
5791 nlh->nlmsg_type != RTM_GETTFILTER &&
5792 nlh->nlmsg_type != RTM_DELTFILTER)
5794 len -= NLMSG_LENGTH(sizeof(*t));
5797 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
5798 /* Not a TC flower flow - bail out */
5799 if (!tb[TCA_KIND] ||
5800 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
5802 switch (rta_type[idx]) {
5804 if (tb[TCA_OPTIONS])
5805 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
5816 * A callback to parse Netlink reply on TC flower query.
5819 * Message received from Netlink.
5821 * Pointer to data area to be filled by the parsing routine.
5822 * assumed to be a pointer to struct flow_tcf_stats_basic.
5828 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
5831 * The backward sequence of rta_types to pass in order to get
5834 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
5835 TCA_FLOWER_ACT, TCA_OPTIONS };
5836 struct flow_tcf_stats_basic *sb_data = data;
5838 const struct nlmsghdr *c;
5839 struct nlmsghdr *nc;
5840 } tnlh = { .c = nlh };
5842 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
5843 RTE_DIM(rta_type) - 1,
5844 (void *)&sb_data->counters))
5845 sb_data->valid = true;
5850 * Query a TC flower rule for its statistics via netlink.
5853 * Pointer to Ethernet device.
5855 * Pointer to the sub flow.
5857 * data retrieved by the query.
5859 * Perform verbose error reporting if not NULL.
5862 * 0 on success, a negative errno value otherwise and rte_errno is set.
5865 flow_tcf_query_count(struct rte_eth_dev *dev,
5866 struct rte_flow *flow,
5868 struct rte_flow_error *error)
5870 struct flow_tcf_stats_basic sb_data;
5871 struct rte_flow_query_count *qc = data;
5872 struct priv *priv = dev->data->dev_private;
5873 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
5874 struct mnl_socket *nl = ctx->nl;
5875 struct mlx5_flow *dev_flow;
5876 struct nlmsghdr *nlh;
5877 uint32_t seq = priv->tcf_context->seq++;
5881 memset(&sb_data, 0, sizeof(sb_data));
5882 dev_flow = LIST_FIRST(&flow->dev_flows);
5883 /* E-Switch flow can't be expanded. */
5884 assert(!LIST_NEXT(dev_flow, next));
5885 if (!dev_flow->flow->counter)
5887 nlh = dev_flow->tcf.nlh;
5888 nlh->nlmsg_type = RTM_GETTFILTER;
5889 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
5890 nlh->nlmsg_seq = seq;
5891 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
5894 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
5897 ret = mnl_cb_run(ctx->buf, ret, seq,
5898 mnl_socket_get_portid(nl),
5899 flow_tcf_nl_message_get_stats_basic,
5902 /* Return the delta from last reset. */
5903 if (sb_data.valid) {
5904 /* Return the delta from last reset. */
5907 qc->hits = sb_data.counters.packets - flow->counter->hits;
5908 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
5910 flow->counter->hits = sb_data.counters.packets;
5911 flow->counter->bytes = sb_data.counters.bytes;
5915 return rte_flow_error_set(error, EINVAL,
5916 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5918 "flow does not have counter");
5920 return rte_flow_error_set
5921 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5922 NULL, "netlink: failed to read flow rule counters");
5924 return rte_flow_error_set
5925 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5926 NULL, "counters are not available.");
5932 * @see rte_flow_query()
5936 flow_tcf_query(struct rte_eth_dev *dev,
5937 struct rte_flow *flow,
5938 const struct rte_flow_action *actions,
5940 struct rte_flow_error *error)
5944 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5945 switch (actions->type) {
5946 case RTE_FLOW_ACTION_TYPE_VOID:
5948 case RTE_FLOW_ACTION_TYPE_COUNT:
5949 ret = flow_tcf_query_count(dev, flow, data, error);
5952 return rte_flow_error_set(error, ENOTSUP,
5953 RTE_FLOW_ERROR_TYPE_ACTION,
5955 "action not supported");
5961 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
5962 .validate = flow_tcf_validate,
5963 .prepare = flow_tcf_prepare,
5964 .translate = flow_tcf_translate,
5965 .apply = flow_tcf_apply,
5966 .remove = flow_tcf_remove,
5967 .destroy = flow_tcf_destroy,
5968 .query = flow_tcf_query,
5972 * Create and configure a libmnl socket for Netlink flow rules.
5975 * A valid libmnl socket object pointer on success, NULL otherwise and
5978 static struct mnl_socket *
5979 flow_tcf_mnl_socket_create(void)
5981 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
5984 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
5986 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
5991 mnl_socket_close(nl);
5996 * Destroy a libmnl socket.
5999 * Libmnl socket of the @p NETLINK_ROUTE kind.
6002 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
6005 mnl_socket_close(nl);
6009 * Initialize ingress qdisc of a given network interface.
6012 * Pointer to tc-flower context to use.
6014 * Index of network interface to initialize.
6016 * Perform verbose error reporting if not NULL.
6019 * 0 on success, a negative errno value otherwise and rte_errno is set.
6022 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
6023 unsigned int ifindex, struct rte_flow_error *error)
6025 struct nlmsghdr *nlh;
6027 alignas(struct nlmsghdr)
6028 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm)) +
6029 SZ_NLATTR_STRZ_OF("ingress") +
6030 MNL_BUF_EXTRA_SPACE];
6032 /* Destroy existing ingress qdisc and everything attached to it. */
6033 nlh = mnl_nlmsg_put_header(buf);
6034 nlh->nlmsg_type = RTM_DELQDISC;
6035 nlh->nlmsg_flags = NLM_F_REQUEST;
6036 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
6037 tcm->tcm_family = AF_UNSPEC;
6038 tcm->tcm_ifindex = ifindex;
6039 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
6040 tcm->tcm_parent = TC_H_INGRESS;
6041 assert(sizeof(buf) >= nlh->nlmsg_len);
6042 /* Ignore errors when qdisc is already absent. */
6043 if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL) &&
6044 rte_errno != EINVAL && rte_errno != ENOENT)
6045 return rte_flow_error_set(error, rte_errno,
6046 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
6047 "netlink: failed to remove ingress"
6049 /* Create fresh ingress qdisc. */
6050 nlh = mnl_nlmsg_put_header(buf);
6051 nlh->nlmsg_type = RTM_NEWQDISC;
6052 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
6053 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
6054 tcm->tcm_family = AF_UNSPEC;
6055 tcm->tcm_ifindex = ifindex;
6056 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
6057 tcm->tcm_parent = TC_H_INGRESS;
6058 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
6059 assert(sizeof(buf) >= nlh->nlmsg_len);
6060 if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL))
6061 return rte_flow_error_set(error, rte_errno,
6062 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
6063 "netlink: failed to create ingress"
6069 * Create libmnl context for Netlink flow rules.
6072 * A valid libmnl socket object pointer on success, NULL otherwise and
6075 struct mlx5_flow_tcf_context *
6076 mlx5_flow_tcf_context_create(void)
6078 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
6083 ctx->nl = flow_tcf_mnl_socket_create();
6086 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
6087 ctx->buf = rte_zmalloc(__func__,
6088 ctx->buf_size, sizeof(uint32_t));
6091 ctx->seq = random();
6094 mlx5_flow_tcf_context_destroy(ctx);
6099 * Destroy a libmnl context.
6102 * Libmnl socket of the @p NETLINK_ROUTE kind.
6105 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
6109 flow_tcf_mnl_socket_destroy(ctx->nl);