1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
118 #include <linux/tc_act/tc_tunnel_key.h>
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
142 struct tc_tunnel_key {
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
163 #ifndef HAVE_TCA_CHAIN
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323 FLOW_TCF_TUNACT_VXLAN_DECAP,
324 FLOW_TCF_TUNACT_VXLAN_ENCAP,
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
339 * Structure for holding netlink context.
340 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341 * Using this (8KB) buffer size ensures that netlink messages will never be
344 struct mlx5_flow_tcf_context {
345 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346 uint32_t seq; /* Message sequence number. */
347 uint32_t buf_size; /* Message buffer size. */
348 uint8_t *buf; /* Message buffer. */
352 * Neigh rule structure. The neigh rule is applied via Netlink to
353 * outer tunnel iface in order to provide destination MAC address
354 * for the VXLAN encapsultion. The neigh rule is implicitly related
355 * to the Flow itself and can be shared by multiple Flows.
357 struct tcf_neigh_rule {
358 LIST_ENTRY(tcf_neigh_rule) next;
360 struct ether_addr eth;
367 uint8_t dst[IPV6_ADDR_LEN];
373 * Local rule structure. The local rule is applied via Netlink to
374 * outer tunnel iface in order to provide local and peer IP addresses
375 * of the VXLAN tunnel for encapsulation. The local rule is implicitly
376 * related to the Flow itself and can be shared by multiple Flows.
378 struct tcf_local_rule {
379 LIST_ENTRY(tcf_local_rule) next;
388 uint8_t dst[IPV6_ADDR_LEN];
389 uint8_t src[IPV6_ADDR_LEN];
394 /** VXLAN virtual netdev. */
396 LIST_ENTRY(tcf_vtep) next;
397 LIST_HEAD(, tcf_neigh_rule) neigh;
398 LIST_HEAD(, tcf_local_rule) local;
400 unsigned int ifindex; /**< Own interface index. */
401 unsigned int ifouter; /**< Index of device attached to. */
406 /** Tunnel descriptor header, common for all tunnel types. */
407 struct flow_tcf_tunnel_hdr {
408 uint32_t type; /**< Tunnel action type. */
409 struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
410 unsigned int ifindex_org; /**< Original dst/src interface */
411 unsigned int *ifindex_ptr; /**< Interface ptr in message. */
414 struct flow_tcf_vxlan_decap {
415 struct flow_tcf_tunnel_hdr hdr;
419 struct flow_tcf_vxlan_encap {
420 struct flow_tcf_tunnel_hdr hdr;
423 struct ether_addr dst;
424 struct ether_addr src;
432 uint8_t dst[IPV6_ADDR_LEN];
433 uint8_t src[IPV6_ADDR_LEN];
445 /** Structure used when extracting the values of a flow counters
446 * from a netlink message.
448 struct flow_tcf_stats_basic {
450 struct gnet_stats_basic counters;
453 /** Empty masks for known item types. */
455 struct rte_flow_item_port_id port_id;
456 struct rte_flow_item_eth eth;
457 struct rte_flow_item_vlan vlan;
458 struct rte_flow_item_ipv4 ipv4;
459 struct rte_flow_item_ipv6 ipv6;
460 struct rte_flow_item_tcp tcp;
461 struct rte_flow_item_udp udp;
462 struct rte_flow_item_vxlan vxlan;
463 } flow_tcf_mask_empty;
465 /** Supported masks for known item types. */
466 static const struct {
467 struct rte_flow_item_port_id port_id;
468 struct rte_flow_item_eth eth;
469 struct rte_flow_item_vlan vlan;
470 struct rte_flow_item_ipv4 ipv4;
471 struct rte_flow_item_ipv6 ipv6;
472 struct rte_flow_item_tcp tcp;
473 struct rte_flow_item_udp udp;
474 struct rte_flow_item_vxlan vxlan;
475 } flow_tcf_mask_supported = {
480 .type = RTE_BE16(0xffff),
481 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
482 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
485 /* PCP and VID only, no DEI. */
486 .tci = RTE_BE16(0xefff),
487 .inner_type = RTE_BE16(0xffff),
490 .next_proto_id = 0xff,
491 .src_addr = RTE_BE32(0xffffffff),
492 .dst_addr = RTE_BE32(0xffffffff),
497 "\xff\xff\xff\xff\xff\xff\xff\xff"
498 "\xff\xff\xff\xff\xff\xff\xff\xff",
500 "\xff\xff\xff\xff\xff\xff\xff\xff"
501 "\xff\xff\xff\xff\xff\xff\xff\xff",
504 .src_port = RTE_BE16(0xffff),
505 .dst_port = RTE_BE16(0xffff),
509 .src_port = RTE_BE16(0xffff),
510 .dst_port = RTE_BE16(0xffff),
513 .vni = "\xff\xff\xff",
517 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
518 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
519 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
520 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
521 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
523 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
525 /** DPDK port to network interface index (ifindex) conversion. */
526 struct flow_tcf_ptoi {
527 uint16_t port_id; /**< DPDK port ID. */
528 unsigned int ifindex; /**< Network interface index. */
531 /* Due to a limitation on driver/FW. */
532 #define MLX5_TCF_GROUP_ID_MAX 3
533 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
535 #define MLX5_TCF_FATE_ACTIONS \
536 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
537 MLX5_FLOW_ACTION_JUMP)
539 #define MLX5_TCF_VLAN_ACTIONS \
540 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
541 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
543 #define MLX5_TCF_VXLAN_ACTIONS \
544 (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
546 #define MLX5_TCF_PEDIT_ACTIONS \
547 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
548 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
549 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
550 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
551 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
553 #define MLX5_TCF_CONFIG_ACTIONS \
554 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
555 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
556 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
557 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
559 #define MAX_PEDIT_KEYS 128
560 #define SZ_PEDIT_KEY_VAL 4
562 #define NUM_OF_PEDIT_KEYS(sz) \
563 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
565 struct pedit_key_ex {
566 enum pedit_header_type htype;
570 struct pedit_parser {
571 struct tc_pedit_sel sel;
572 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
573 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
577 * Create space for using the implicitly created TC flow counter.
580 * Pointer to the Ethernet device structure.
583 * A pointer to the counter data structure, NULL otherwise and
586 static struct mlx5_flow_counter *
587 flow_tcf_counter_new(void)
589 struct mlx5_flow_counter *cnt;
592 * eswitch counter cannot be shared and its id is unknown.
593 * currently returning all with id 0.
594 * in the future maybe better to switch to unique numbers.
596 struct mlx5_flow_counter tmpl = {
599 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
605 /* Implicit counter, do not add to list. */
610 * Set pedit key of MAC address
613 * pointer to action specification
614 * @param[in,out] p_parser
615 * pointer to pedit_parser
618 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
619 struct pedit_parser *p_parser)
621 int idx = p_parser->sel.nkeys;
622 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
623 offsetof(struct ether_hdr, s_addr) :
624 offsetof(struct ether_hdr, d_addr);
625 const struct rte_flow_action_set_mac *conf =
626 (const struct rte_flow_action_set_mac *)actions->conf;
628 p_parser->keys[idx].off = off;
629 p_parser->keys[idx].mask = ~UINT32_MAX;
630 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
631 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
632 memcpy(&p_parser->keys[idx].val,
633 conf->mac_addr, SZ_PEDIT_KEY_VAL);
635 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
636 p_parser->keys[idx].mask = 0xFFFF0000;
637 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
638 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
639 memcpy(&p_parser->keys[idx].val,
640 conf->mac_addr + SZ_PEDIT_KEY_VAL,
641 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
642 p_parser->sel.nkeys = (++idx);
646 * Set pedit key of decrease/set ttl
649 * pointer to action specification
650 * @param[in,out] p_parser
651 * pointer to pedit_parser
652 * @param[in] item_flags
653 * flags of all items presented
656 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
657 struct pedit_parser *p_parser,
660 int idx = p_parser->sel.nkeys;
662 p_parser->keys[idx].mask = 0xFFFFFF00;
663 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
664 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
665 p_parser->keys[idx].off =
666 offsetof(struct ipv4_hdr, time_to_live);
668 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
669 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
670 p_parser->keys[idx].off =
671 offsetof(struct ipv6_hdr, hop_limits);
673 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
674 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
675 p_parser->keys[idx].val = 0x000000FF;
677 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
678 p_parser->keys[idx].val =
679 (__u32)((const struct rte_flow_action_set_ttl *)
680 actions->conf)->ttl_value;
682 p_parser->sel.nkeys = (++idx);
686 * Set pedit key of transport (TCP/UDP) port value
689 * pointer to action specification
690 * @param[in,out] p_parser
691 * pointer to pedit_parser
692 * @param[in] item_flags
693 * flags of all items presented
696 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
697 struct pedit_parser *p_parser,
700 int idx = p_parser->sel.nkeys;
702 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
703 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
704 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
705 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
706 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
707 /* offset of src/dst port is same for TCP and UDP */
708 p_parser->keys[idx].off =
709 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
710 offsetof(struct tcp_hdr, src_port) :
711 offsetof(struct tcp_hdr, dst_port);
712 p_parser->keys[idx].mask = 0xFFFF0000;
713 p_parser->keys[idx].val =
714 (__u32)((const struct rte_flow_action_set_tp *)
715 actions->conf)->port;
716 p_parser->sel.nkeys = (++idx);
720 * Set pedit key of ipv6 address
723 * pointer to action specification
724 * @param[in,out] p_parser
725 * pointer to pedit_parser
728 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
729 struct pedit_parser *p_parser)
731 int idx = p_parser->sel.nkeys;
732 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
734 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
735 offsetof(struct ipv6_hdr, src_addr) :
736 offsetof(struct ipv6_hdr, dst_addr);
737 const struct rte_flow_action_set_ipv6 *conf =
738 (const struct rte_flow_action_set_ipv6 *)actions->conf;
740 for (int i = 0; i < keys; i++, idx++) {
741 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
742 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
743 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
744 p_parser->keys[idx].mask = ~UINT32_MAX;
745 memcpy(&p_parser->keys[idx].val,
746 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
749 p_parser->sel.nkeys += keys;
753 * Set pedit key of ipv4 address
756 * pointer to action specification
757 * @param[in,out] p_parser
758 * pointer to pedit_parser
761 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
762 struct pedit_parser *p_parser)
764 int idx = p_parser->sel.nkeys;
766 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
767 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
768 p_parser->keys[idx].off =
769 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
770 offsetof(struct ipv4_hdr, src_addr) :
771 offsetof(struct ipv4_hdr, dst_addr);
772 p_parser->keys[idx].mask = ~UINT32_MAX;
773 p_parser->keys[idx].val =
774 ((const struct rte_flow_action_set_ipv4 *)
775 actions->conf)->ipv4_addr;
776 p_parser->sel.nkeys = (++idx);
780 * Create the pedit's na attribute in netlink message
781 * on pre-allocate message buffer
784 * pointer to pre-allocated netlink message buffer
785 * @param[in,out] actions
786 * pointer to pointer of actions specification.
787 * @param[in,out] action_flags
788 * pointer to actions flags
789 * @param[in] item_flags
790 * flags of all item presented
793 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
794 const struct rte_flow_action **actions,
797 struct pedit_parser p_parser;
798 struct nlattr *na_act_options;
799 struct nlattr *na_pedit_keys;
801 memset(&p_parser, 0, sizeof(p_parser));
802 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
803 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
804 /* all modify header actions should be in one tc-pedit action */
805 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
806 switch ((*actions)->type) {
807 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
808 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
809 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
811 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
812 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
813 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
815 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
816 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
817 flow_tcf_pedit_key_set_tp_port(*actions,
818 &p_parser, item_flags);
820 case RTE_FLOW_ACTION_TYPE_SET_TTL:
821 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
822 flow_tcf_pedit_key_set_dec_ttl(*actions,
823 &p_parser, item_flags);
825 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
826 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
827 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
830 goto pedit_mnl_msg_done;
834 p_parser.sel.action = TC_ACT_PIPE;
835 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
836 sizeof(p_parser.sel) +
837 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
840 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
841 for (int i = 0; i < p_parser.sel.nkeys; i++) {
842 struct nlattr *na_pedit_key =
843 mnl_attr_nest_start(nl,
844 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
845 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
846 p_parser.keys_ex[i].htype);
847 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
848 p_parser.keys_ex[i].cmd);
849 mnl_attr_nest_end(nl, na_pedit_key);
851 mnl_attr_nest_end(nl, na_pedit_keys);
852 mnl_attr_nest_end(nl, na_act_options);
857 * Calculate max memory size of one TC-pedit actions.
858 * One TC-pedit action can contain set of keys each defining
859 * a rewrite element (rte_flow action)
861 * @param[in,out] actions
862 * actions specification.
863 * @param[in,out] action_flags
865 * @param[in,out] size
868 * Max memory size of one TC-pedit action
871 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
872 uint64_t *action_flags)
878 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
879 SZ_NLATTR_STRZ_OF("pedit") +
880 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
881 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
882 switch ((*actions)->type) {
883 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
884 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
885 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
887 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
888 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
889 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
891 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
892 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
893 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
895 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
896 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
897 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
899 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
900 /* TCP is as same as UDP */
901 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
902 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
904 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
905 /* TCP is as same as UDP */
906 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
907 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
909 case RTE_FLOW_ACTION_TYPE_SET_TTL:
910 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
911 flags |= MLX5_FLOW_ACTION_SET_TTL;
913 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
914 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
915 flags |= MLX5_FLOW_ACTION_DEC_TTL;
917 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
918 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
919 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
921 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
922 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
923 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
926 goto get_pedit_action_size_done;
929 get_pedit_action_size_done:
930 /* TCA_PEDIT_PARAMS_EX */
932 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
933 keys * sizeof(struct tc_pedit_key));
934 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
936 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
937 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
938 SZ_NLATTR_DATA_OF(2));
939 (*action_flags) |= flags;
945 * Retrieve mask for pattern item.
947 * This function does basic sanity checks on a pattern item in order to
948 * return the most appropriate mask for it.
951 * Item specification.
952 * @param[in] mask_default
953 * Default mask for pattern item as specified by the flow API.
954 * @param[in] mask_supported
955 * Mask fields supported by the implementation.
956 * @param[in] mask_empty
957 * Empty mask to return when there is no specification.
959 * Perform verbose error reporting if not NULL.
962 * Either @p item->mask or one of the mask parameters on success, NULL
963 * otherwise and rte_errno is set.
966 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
967 const void *mask_supported, const void *mask_empty,
968 size_t mask_size, struct rte_flow_error *error)
973 /* item->last and item->mask cannot exist without item->spec. */
974 if (!item->spec && (item->mask || item->last)) {
975 rte_flow_error_set(error, EINVAL,
976 RTE_FLOW_ERROR_TYPE_ITEM, item,
977 "\"mask\" or \"last\" field provided without"
978 " a corresponding \"spec\"");
981 /* No spec, no mask, no problem. */
984 mask = item->mask ? item->mask : mask_default;
987 * Single-pass check to make sure that:
988 * - Mask is supported, no bits are set outside mask_supported.
989 * - Both item->spec and item->last are included in mask.
991 for (i = 0; i != mask_size; ++i) {
994 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
995 ((const uint8_t *)mask_supported)[i]) {
996 rte_flow_error_set(error, ENOTSUP,
997 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
998 "unsupported field found"
1003 (((const uint8_t *)item->spec)[i] & mask[i]) !=
1004 (((const uint8_t *)item->last)[i] & mask[i])) {
1005 rte_flow_error_set(error, EINVAL,
1006 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
1008 "range between \"spec\" and \"last\""
1009 " not comprised in \"mask\"");
1017 * Build a conversion table between port ID and ifindex.
1020 * Pointer to Ethernet device.
1022 * Pointer to ptoi table.
1024 * Size of ptoi table provided.
1027 * Size of ptoi table filled.
1030 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
1033 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1034 uint16_t port_id[n + 1];
1036 unsigned int own = 0;
1038 /* At least one port is needed when no switch domain is present. */
1041 port_id[0] = dev->data->port_id;
1043 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1047 for (i = 0; i != n; ++i) {
1048 struct rte_eth_dev_info dev_info;
1050 rte_eth_dev_info_get(port_id[i], &dev_info);
1051 if (port_id[i] == dev->data->port_id)
1053 ptoi[i].port_id = port_id[i];
1054 ptoi[i].ifindex = dev_info.if_index;
1056 /* Ensure first entry of ptoi[] is the current device. */
1059 ptoi[0] = ptoi[own];
1060 ptoi[own] = ptoi[n];
1062 /* An entry with zero ifindex terminates ptoi[]. */
1063 ptoi[n].port_id = 0;
1064 ptoi[n].ifindex = 0;
1069 * Verify the @p attr will be correctly understood by the E-switch.
1072 * Pointer to flow attributes
1074 * Pointer to error structure.
1077 * 0 on success, a negative errno value otherwise and rte_errno is set.
1080 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
1081 struct rte_flow_error *error)
1084 * Supported attributes: groups, some priorities and ingress only.
1085 * group is supported only if kernel supports chain. Don't care about
1086 * transfer as it is the caller's problem.
1088 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
1089 return rte_flow_error_set(error, ENOTSUP,
1090 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
1091 "group ID larger than "
1092 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
1093 " isn't supported");
1094 else if (attr->group > 0 &&
1095 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
1096 return rte_flow_error_set(error, ENOTSUP,
1097 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1099 "lowest priority level is "
1100 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1101 " when group is configured");
1102 else if (attr->priority > 0xfffe)
1103 return rte_flow_error_set(error, ENOTSUP,
1104 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1106 "lowest priority level is 0xfffe");
1108 return rte_flow_error_set(error, EINVAL,
1109 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1110 attr, "only ingress is supported");
1112 return rte_flow_error_set(error, ENOTSUP,
1113 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1114 attr, "egress is not supported");
1119 * Validate flow for E-Switch.
1122 * Pointer to the priv structure.
1124 * Pointer to the flow attributes.
1126 * Pointer to the list of items.
1127 * @param[in] actions
1128 * Pointer to the list of actions.
1130 * Pointer to the error structure.
1133 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1136 flow_tcf_validate(struct rte_eth_dev *dev,
1137 const struct rte_flow_attr *attr,
1138 const struct rte_flow_item items[],
1139 const struct rte_flow_action actions[],
1140 struct rte_flow_error *error)
1143 const struct rte_flow_item_port_id *port_id;
1144 const struct rte_flow_item_eth *eth;
1145 const struct rte_flow_item_vlan *vlan;
1146 const struct rte_flow_item_ipv4 *ipv4;
1147 const struct rte_flow_item_ipv6 *ipv6;
1148 const struct rte_flow_item_tcp *tcp;
1149 const struct rte_flow_item_udp *udp;
1152 const struct rte_flow_action_port_id *port_id;
1153 const struct rte_flow_action_jump *jump;
1154 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1155 const struct rte_flow_action_of_set_vlan_vid *
1157 const struct rte_flow_action_of_set_vlan_pcp *
1159 const struct rte_flow_action_set_ipv4 *set_ipv4;
1160 const struct rte_flow_action_set_ipv6 *set_ipv6;
1162 uint64_t item_flags = 0;
1163 uint64_t action_flags = 0;
1164 uint8_t next_protocol = -1;
1165 unsigned int tcm_ifindex = 0;
1166 uint8_t pedit_validated = 0;
1167 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1168 struct rte_eth_dev *port_id_dev = NULL;
1169 bool in_port_id_set;
1172 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1173 PTOI_TABLE_SZ_MAX(dev)));
1174 ret = flow_tcf_validate_attributes(attr, error);
1177 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1180 switch (items->type) {
1181 case RTE_FLOW_ITEM_TYPE_VOID:
1183 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1184 mask.port_id = flow_tcf_item_mask
1185 (items, &rte_flow_item_port_id_mask,
1186 &flow_tcf_mask_supported.port_id,
1187 &flow_tcf_mask_empty.port_id,
1188 sizeof(flow_tcf_mask_supported.port_id),
1192 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1196 spec.port_id = items->spec;
1197 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1198 return rte_flow_error_set
1200 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1202 "no support for partial mask on"
1204 if (!mask.port_id->id)
1207 for (i = 0; ptoi[i].ifindex; ++i)
1208 if (ptoi[i].port_id == spec.port_id->id)
1210 if (!ptoi[i].ifindex)
1211 return rte_flow_error_set
1213 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1215 "missing data to convert port ID to"
1217 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1218 return rte_flow_error_set
1220 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1222 "cannot match traffic for"
1223 " several port IDs through"
1224 " a single flow rule");
1225 tcm_ifindex = ptoi[i].ifindex;
1228 case RTE_FLOW_ITEM_TYPE_ETH:
1229 ret = mlx5_flow_validate_item_eth(items, item_flags,
1233 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1235 * Redundant check due to different supported mask.
1236 * Same for the rest of items.
1238 mask.eth = flow_tcf_item_mask
1239 (items, &rte_flow_item_eth_mask,
1240 &flow_tcf_mask_supported.eth,
1241 &flow_tcf_mask_empty.eth,
1242 sizeof(flow_tcf_mask_supported.eth),
1246 if (mask.eth->type && mask.eth->type !=
1248 return rte_flow_error_set
1250 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1252 "no support for partial mask on"
1255 case RTE_FLOW_ITEM_TYPE_VLAN:
1256 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1260 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1261 mask.vlan = flow_tcf_item_mask
1262 (items, &rte_flow_item_vlan_mask,
1263 &flow_tcf_mask_supported.vlan,
1264 &flow_tcf_mask_empty.vlan,
1265 sizeof(flow_tcf_mask_supported.vlan),
1269 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1270 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1271 RTE_BE16(0xe000)) ||
1272 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1273 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1274 RTE_BE16(0x0fff)) ||
1275 (mask.vlan->inner_type &&
1276 mask.vlan->inner_type != RTE_BE16(0xffff)))
1277 return rte_flow_error_set
1279 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1281 "no support for partial masks on"
1282 " \"tci\" (PCP and VID parts) and"
1283 " \"inner_type\" fields");
1285 case RTE_FLOW_ITEM_TYPE_IPV4:
1286 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1290 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1291 mask.ipv4 = flow_tcf_item_mask
1292 (items, &rte_flow_item_ipv4_mask,
1293 &flow_tcf_mask_supported.ipv4,
1294 &flow_tcf_mask_empty.ipv4,
1295 sizeof(flow_tcf_mask_supported.ipv4),
1299 if (mask.ipv4->hdr.next_proto_id &&
1300 mask.ipv4->hdr.next_proto_id != 0xff)
1301 return rte_flow_error_set
1303 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1305 "no support for partial mask on"
1306 " \"hdr.next_proto_id\" field");
1307 else if (mask.ipv4->hdr.next_proto_id)
1309 ((const struct rte_flow_item_ipv4 *)
1310 (items->spec))->hdr.next_proto_id;
1312 case RTE_FLOW_ITEM_TYPE_IPV6:
1313 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1317 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1318 mask.ipv6 = flow_tcf_item_mask
1319 (items, &rte_flow_item_ipv6_mask,
1320 &flow_tcf_mask_supported.ipv6,
1321 &flow_tcf_mask_empty.ipv6,
1322 sizeof(flow_tcf_mask_supported.ipv6),
1326 if (mask.ipv6->hdr.proto &&
1327 mask.ipv6->hdr.proto != 0xff)
1328 return rte_flow_error_set
1330 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1332 "no support for partial mask on"
1333 " \"hdr.proto\" field");
1334 else if (mask.ipv6->hdr.proto)
1336 ((const struct rte_flow_item_ipv6 *)
1337 (items->spec))->hdr.proto;
1339 case RTE_FLOW_ITEM_TYPE_UDP:
1340 ret = mlx5_flow_validate_item_udp(items, item_flags,
1341 next_protocol, error);
1344 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1345 mask.udp = flow_tcf_item_mask
1346 (items, &rte_flow_item_udp_mask,
1347 &flow_tcf_mask_supported.udp,
1348 &flow_tcf_mask_empty.udp,
1349 sizeof(flow_tcf_mask_supported.udp),
1354 case RTE_FLOW_ITEM_TYPE_TCP:
1355 ret = mlx5_flow_validate_item_tcp
1358 &flow_tcf_mask_supported.tcp,
1362 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1363 mask.tcp = flow_tcf_item_mask
1364 (items, &rte_flow_item_tcp_mask,
1365 &flow_tcf_mask_supported.tcp,
1366 &flow_tcf_mask_empty.tcp,
1367 sizeof(flow_tcf_mask_supported.tcp),
1373 return rte_flow_error_set(error, ENOTSUP,
1374 RTE_FLOW_ERROR_TYPE_ITEM,
1375 NULL, "item not supported");
1378 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1380 uint64_t current_action_flag = 0;
1382 switch (actions->type) {
1383 case RTE_FLOW_ACTION_TYPE_VOID:
1385 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1386 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1389 conf.port_id = actions->conf;
1390 if (conf.port_id->original)
1393 for (i = 0; ptoi[i].ifindex; ++i)
1394 if (ptoi[i].port_id == conf.port_id->id)
1396 if (!ptoi[i].ifindex)
1397 return rte_flow_error_set
1399 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1401 "missing data to convert port ID to"
1403 port_id_dev = &rte_eth_devices[conf.port_id->id];
1405 case RTE_FLOW_ACTION_TYPE_JUMP:
1406 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1409 conf.jump = actions->conf;
1410 if (attr->group >= conf.jump->group)
1411 return rte_flow_error_set
1413 RTE_FLOW_ERROR_TYPE_ACTION,
1415 "can jump only to a group forward");
1417 case RTE_FLOW_ACTION_TYPE_DROP:
1418 current_action_flag = MLX5_FLOW_ACTION_DROP;
1420 case RTE_FLOW_ACTION_TYPE_COUNT:
1422 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1423 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1425 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1426 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1428 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1429 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1430 return rte_flow_error_set
1432 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1433 "vlan modify is not supported,"
1434 " set action must follow push action");
1435 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1437 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1438 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1439 return rte_flow_error_set
1441 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1442 "vlan modify is not supported,"
1443 " set action must follow push action");
1444 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1446 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1447 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1449 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1450 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1452 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1453 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1455 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1456 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1458 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1459 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1461 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1462 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1464 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1465 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1467 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1468 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1470 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1471 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1473 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1474 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1477 return rte_flow_error_set(error, ENOTSUP,
1478 RTE_FLOW_ERROR_TYPE_ACTION,
1480 "action not supported");
1482 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1484 return rte_flow_error_set(error, EINVAL,
1485 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1487 "action configuration not set");
1489 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1491 return rte_flow_error_set(error, ENOTSUP,
1492 RTE_FLOW_ERROR_TYPE_ACTION,
1494 "set actions should be "
1495 "listed successively");
1496 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1497 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1498 pedit_validated = 1;
1499 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1500 (action_flags & MLX5_TCF_FATE_ACTIONS))
1501 return rte_flow_error_set(error, EINVAL,
1502 RTE_FLOW_ERROR_TYPE_ACTION,
1504 "can't have multiple fate"
1506 action_flags |= current_action_flag;
1508 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1509 (action_flags & MLX5_FLOW_ACTION_DROP))
1510 return rte_flow_error_set(error, ENOTSUP,
1511 RTE_FLOW_ERROR_TYPE_ACTION,
1513 "set action is not compatible with "
1515 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1516 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1517 return rte_flow_error_set(error, ENOTSUP,
1518 RTE_FLOW_ERROR_TYPE_ACTION,
1520 "set action must be followed by "
1523 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1524 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1525 return rte_flow_error_set(error, EINVAL,
1526 RTE_FLOW_ERROR_TYPE_ACTION,
1528 "no ipv4 item found in"
1532 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1533 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1534 return rte_flow_error_set(error, EINVAL,
1535 RTE_FLOW_ERROR_TYPE_ACTION,
1537 "no ipv6 item found in"
1541 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1543 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1544 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1545 return rte_flow_error_set(error, EINVAL,
1546 RTE_FLOW_ERROR_TYPE_ACTION,
1548 "no TCP/UDP item found in"
1552 * FW syndrome (0xA9C090):
1553 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1554 * forward to the uplink.
1556 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1557 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1558 ((struct priv *)port_id_dev->data->dev_private)->representor)
1559 return rte_flow_error_set(error, ENOTSUP,
1560 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1561 "vlan push can only be applied"
1562 " when forwarding to uplink port");
1564 * FW syndrome (0x294609):
1565 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1566 * are supported only while forwarding to vport.
1568 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1569 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1570 return rte_flow_error_set(error, ENOTSUP,
1571 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1572 "vlan actions are supported"
1573 " only with port_id action");
1574 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1575 return rte_flow_error_set(error, EINVAL,
1576 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1577 "no fate action is found");
1579 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1581 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1582 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1583 return rte_flow_error_set(error, EINVAL,
1584 RTE_FLOW_ERROR_TYPE_ACTION,
1586 "no IP found in pattern");
1589 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1590 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1591 return rte_flow_error_set(error, ENOTSUP,
1592 RTE_FLOW_ERROR_TYPE_ACTION,
1594 "no ethernet found in"
1601 * Calculate maximum size of memory for flow items of Linux TC flower and
1602 * extract specified items.
1605 * Pointer to the list of items.
1606 * @param[out] item_flags
1607 * Pointer to the detected items.
1610 * Maximum size of memory for items.
1613 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1614 const struct rte_flow_item items[],
1615 uint64_t *item_flags)
1620 size += SZ_NLATTR_STRZ_OF("flower") +
1621 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1622 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1623 if (attr->group > 0)
1624 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1625 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1626 switch (items->type) {
1627 case RTE_FLOW_ITEM_TYPE_VOID:
1629 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1631 case RTE_FLOW_ITEM_TYPE_ETH:
1632 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1633 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1634 /* dst/src MAC addr and mask. */
1635 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1637 case RTE_FLOW_ITEM_TYPE_VLAN:
1638 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1639 SZ_NLATTR_TYPE_OF(uint16_t) +
1640 /* VLAN Ether type. */
1641 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1642 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1643 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1645 case RTE_FLOW_ITEM_TYPE_IPV4:
1646 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1647 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1648 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1649 /* dst/src IP addr and mask. */
1650 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1652 case RTE_FLOW_ITEM_TYPE_IPV6:
1653 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1654 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1655 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1656 /* dst/src IP addr and mask. */
1657 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1659 case RTE_FLOW_ITEM_TYPE_UDP:
1660 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1661 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1662 /* dst/src port and mask. */
1663 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1665 case RTE_FLOW_ITEM_TYPE_TCP:
1666 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1667 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1668 /* dst/src port and mask. */
1669 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1673 "unsupported item %p type %d,"
1674 " items must be validated before flow creation",
1675 (const void *)items, items->type);
1679 *item_flags = flags;
1684 * Calculate maximum size of memory for flow actions of Linux TC flower and
1685 * extract specified actions.
1687 * @param[in] actions
1688 * Pointer to the list of actions.
1689 * @param[out] action_flags
1690 * Pointer to the detected actions.
1693 * Maximum size of memory for actions.
1696 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1697 uint64_t *action_flags)
1702 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1703 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1704 switch (actions->type) {
1705 case RTE_FLOW_ACTION_TYPE_VOID:
1707 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1708 size += SZ_NLATTR_NEST + /* na_act_index. */
1709 SZ_NLATTR_STRZ_OF("mirred") +
1710 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1711 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1712 flags |= MLX5_FLOW_ACTION_PORT_ID;
1714 case RTE_FLOW_ACTION_TYPE_JUMP:
1715 size += SZ_NLATTR_NEST + /* na_act_index. */
1716 SZ_NLATTR_STRZ_OF("gact") +
1717 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1718 SZ_NLATTR_TYPE_OF(struct tc_gact);
1719 flags |= MLX5_FLOW_ACTION_JUMP;
1721 case RTE_FLOW_ACTION_TYPE_DROP:
1722 size += SZ_NLATTR_NEST + /* na_act_index. */
1723 SZ_NLATTR_STRZ_OF("gact") +
1724 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1725 SZ_NLATTR_TYPE_OF(struct tc_gact);
1726 flags |= MLX5_FLOW_ACTION_DROP;
1728 case RTE_FLOW_ACTION_TYPE_COUNT:
1730 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1731 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1732 goto action_of_vlan;
1733 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1734 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1735 goto action_of_vlan;
1736 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1737 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1738 goto action_of_vlan;
1739 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1740 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1741 goto action_of_vlan;
1743 size += SZ_NLATTR_NEST + /* na_act_index. */
1744 SZ_NLATTR_STRZ_OF("vlan") +
1745 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1746 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1747 SZ_NLATTR_TYPE_OF(uint16_t) +
1748 /* VLAN protocol. */
1749 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1750 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1752 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1753 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1754 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1755 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1756 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1757 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1758 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1759 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1760 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1761 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1762 size += flow_tcf_get_pedit_actions_size(&actions,
1767 "unsupported action %p type %d,"
1768 " items must be validated before flow creation",
1769 (const void *)actions, actions->type);
1773 *action_flags = flags;
1778 * Brand rtnetlink buffer with unique handle.
1780 * This handle should be unique for a given network interface to avoid
1784 * Pointer to Netlink message.
1786 * Unique 32-bit handle to use.
1789 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1791 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1793 tcm->tcm_handle = handle;
1794 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1795 (void *)nlh, handle);
1799 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1800 * memory required, allocates the memory, initializes Netlink message headers
1801 * and set unique TC message handle.
1804 * Pointer to the flow attributes.
1806 * Pointer to the list of items.
1807 * @param[in] actions
1808 * Pointer to the list of actions.
1809 * @param[out] item_flags
1810 * Pointer to bit mask of all items detected.
1811 * @param[out] action_flags
1812 * Pointer to bit mask of all actions detected.
1814 * Pointer to the error structure.
1817 * Pointer to mlx5_flow object on success,
1818 * otherwise NULL and rte_ernno is set.
1820 static struct mlx5_flow *
1821 flow_tcf_prepare(const struct rte_flow_attr *attr,
1822 const struct rte_flow_item items[],
1823 const struct rte_flow_action actions[],
1824 uint64_t *item_flags, uint64_t *action_flags,
1825 struct rte_flow_error *error)
1827 size_t size = sizeof(struct mlx5_flow) +
1828 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1829 MNL_ALIGN(sizeof(struct tcmsg));
1830 struct mlx5_flow *dev_flow;
1831 struct nlmsghdr *nlh;
1834 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1835 size += flow_tcf_get_actions_and_size(actions, action_flags);
1836 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1838 rte_flow_error_set(error, ENOMEM,
1839 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1840 "not enough memory to create E-Switch flow");
1843 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1844 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1845 *dev_flow = (struct mlx5_flow){
1846 .tcf = (struct mlx5_flow_tcf){
1852 * Generate a reasonably unique handle based on the address of the
1855 * This is straightforward on 32-bit systems where the flow pointer can
1856 * be used directly. Otherwise, its least significant part is taken
1857 * after shifting it by the previous power of two of the pointed buffer
1860 if (sizeof(dev_flow) <= 4)
1861 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1863 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1864 rte_log2_u32(rte_align32prevpow2(size)));
1869 * Make adjustments for supporting count actions.
1872 * Pointer to the Ethernet device structure.
1873 * @param[in] dev_flow
1874 * Pointer to mlx5_flow.
1876 * Pointer to error structure.
1879 * 0 On success else a negative errno value is returned and rte_errno is set.
1882 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1883 struct mlx5_flow *dev_flow,
1884 struct rte_flow_error *error)
1886 struct rte_flow *flow = dev_flow->flow;
1888 if (!flow->counter) {
1889 flow->counter = flow_tcf_counter_new();
1891 return rte_flow_error_set(error, rte_errno,
1892 RTE_FLOW_ERROR_TYPE_ACTION,
1894 "cannot get counter"
1901 * Translate flow for Linux TC flower and construct Netlink message.
1904 * Pointer to the priv structure.
1905 * @param[in, out] flow
1906 * Pointer to the sub flow.
1908 * Pointer to the flow attributes.
1910 * Pointer to the list of items.
1911 * @param[in] actions
1912 * Pointer to the list of actions.
1914 * Pointer to the error structure.
1917 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1920 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1921 const struct rte_flow_attr *attr,
1922 const struct rte_flow_item items[],
1923 const struct rte_flow_action actions[],
1924 struct rte_flow_error *error)
1927 const struct rte_flow_item_port_id *port_id;
1928 const struct rte_flow_item_eth *eth;
1929 const struct rte_flow_item_vlan *vlan;
1930 const struct rte_flow_item_ipv4 *ipv4;
1931 const struct rte_flow_item_ipv6 *ipv6;
1932 const struct rte_flow_item_tcp *tcp;
1933 const struct rte_flow_item_udp *udp;
1936 const struct rte_flow_action_port_id *port_id;
1937 const struct rte_flow_action_jump *jump;
1938 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1939 const struct rte_flow_action_of_set_vlan_vid *
1941 const struct rte_flow_action_of_set_vlan_pcp *
1944 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1945 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1946 struct tcmsg *tcm = dev_flow->tcf.tcm;
1947 uint32_t na_act_index_cur;
1948 bool eth_type_set = 0;
1949 bool vlan_present = 0;
1950 bool vlan_eth_type_set = 0;
1951 bool ip_proto_set = 0;
1952 struct nlattr *na_flower;
1953 struct nlattr *na_flower_act;
1954 struct nlattr *na_vlan_id = NULL;
1955 struct nlattr *na_vlan_priority = NULL;
1956 uint64_t item_flags = 0;
1959 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1960 PTOI_TABLE_SZ_MAX(dev)));
1961 nlh = dev_flow->tcf.nlh;
1962 tcm = dev_flow->tcf.tcm;
1963 /* Prepare API must have been called beforehand. */
1964 assert(nlh != NULL && tcm != NULL);
1965 tcm->tcm_family = AF_UNSPEC;
1966 tcm->tcm_ifindex = ptoi[0].ifindex;
1967 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1969 * Priority cannot be zero to prevent the kernel from picking one
1972 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1973 RTE_BE16(ETH_P_ALL));
1974 if (attr->group > 0)
1975 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1976 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1977 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1978 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1979 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1982 switch (items->type) {
1983 case RTE_FLOW_ITEM_TYPE_VOID:
1985 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1986 mask.port_id = flow_tcf_item_mask
1987 (items, &rte_flow_item_port_id_mask,
1988 &flow_tcf_mask_supported.port_id,
1989 &flow_tcf_mask_empty.port_id,
1990 sizeof(flow_tcf_mask_supported.port_id),
1992 assert(mask.port_id);
1993 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1995 spec.port_id = items->spec;
1996 if (!mask.port_id->id)
1999 for (i = 0; ptoi[i].ifindex; ++i)
2000 if (ptoi[i].port_id == spec.port_id->id)
2002 assert(ptoi[i].ifindex);
2003 tcm->tcm_ifindex = ptoi[i].ifindex;
2005 case RTE_FLOW_ITEM_TYPE_ETH:
2006 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
2007 mask.eth = flow_tcf_item_mask
2008 (items, &rte_flow_item_eth_mask,
2009 &flow_tcf_mask_supported.eth,
2010 &flow_tcf_mask_empty.eth,
2011 sizeof(flow_tcf_mask_supported.eth),
2014 if (mask.eth == &flow_tcf_mask_empty.eth)
2016 spec.eth = items->spec;
2017 if (mask.eth->type) {
2018 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2022 if (!is_zero_ether_addr(&mask.eth->dst)) {
2023 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
2025 spec.eth->dst.addr_bytes);
2026 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
2028 mask.eth->dst.addr_bytes);
2030 if (!is_zero_ether_addr(&mask.eth->src)) {
2031 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
2033 spec.eth->src.addr_bytes);
2034 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
2036 mask.eth->src.addr_bytes);
2039 case RTE_FLOW_ITEM_TYPE_VLAN:
2040 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2041 mask.vlan = flow_tcf_item_mask
2042 (items, &rte_flow_item_vlan_mask,
2043 &flow_tcf_mask_supported.vlan,
2044 &flow_tcf_mask_empty.vlan,
2045 sizeof(flow_tcf_mask_supported.vlan),
2049 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2050 RTE_BE16(ETH_P_8021Q));
2053 if (mask.vlan == &flow_tcf_mask_empty.vlan)
2055 spec.vlan = items->spec;
2056 if (mask.vlan->inner_type) {
2057 mnl_attr_put_u16(nlh,
2058 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
2059 spec.vlan->inner_type);
2060 vlan_eth_type_set = 1;
2062 if (mask.vlan->tci & RTE_BE16(0xe000))
2063 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
2065 (spec.vlan->tci) >> 13) & 0x7);
2066 if (mask.vlan->tci & RTE_BE16(0x0fff))
2067 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
2072 case RTE_FLOW_ITEM_TYPE_IPV4:
2073 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2074 mask.ipv4 = flow_tcf_item_mask
2075 (items, &rte_flow_item_ipv4_mask,
2076 &flow_tcf_mask_supported.ipv4,
2077 &flow_tcf_mask_empty.ipv4,
2078 sizeof(flow_tcf_mask_supported.ipv4),
2081 if (!eth_type_set || !vlan_eth_type_set)
2082 mnl_attr_put_u16(nlh,
2084 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2085 TCA_FLOWER_KEY_ETH_TYPE,
2086 RTE_BE16(ETH_P_IP));
2088 vlan_eth_type_set = 1;
2089 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
2091 spec.ipv4 = items->spec;
2092 if (mask.ipv4->hdr.next_proto_id) {
2093 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2094 spec.ipv4->hdr.next_proto_id);
2097 if (mask.ipv4->hdr.src_addr) {
2098 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
2099 spec.ipv4->hdr.src_addr);
2100 mnl_attr_put_u32(nlh,
2101 TCA_FLOWER_KEY_IPV4_SRC_MASK,
2102 mask.ipv4->hdr.src_addr);
2104 if (mask.ipv4->hdr.dst_addr) {
2105 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
2106 spec.ipv4->hdr.dst_addr);
2107 mnl_attr_put_u32(nlh,
2108 TCA_FLOWER_KEY_IPV4_DST_MASK,
2109 mask.ipv4->hdr.dst_addr);
2112 case RTE_FLOW_ITEM_TYPE_IPV6:
2113 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2114 mask.ipv6 = flow_tcf_item_mask
2115 (items, &rte_flow_item_ipv6_mask,
2116 &flow_tcf_mask_supported.ipv6,
2117 &flow_tcf_mask_empty.ipv6,
2118 sizeof(flow_tcf_mask_supported.ipv6),
2121 if (!eth_type_set || !vlan_eth_type_set)
2122 mnl_attr_put_u16(nlh,
2124 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2125 TCA_FLOWER_KEY_ETH_TYPE,
2126 RTE_BE16(ETH_P_IPV6));
2128 vlan_eth_type_set = 1;
2129 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
2131 spec.ipv6 = items->spec;
2132 if (mask.ipv6->hdr.proto) {
2133 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2134 spec.ipv6->hdr.proto);
2137 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
2138 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
2139 sizeof(spec.ipv6->hdr.src_addr),
2140 spec.ipv6->hdr.src_addr);
2141 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
2142 sizeof(mask.ipv6->hdr.src_addr),
2143 mask.ipv6->hdr.src_addr);
2145 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
2146 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
2147 sizeof(spec.ipv6->hdr.dst_addr),
2148 spec.ipv6->hdr.dst_addr);
2149 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
2150 sizeof(mask.ipv6->hdr.dst_addr),
2151 mask.ipv6->hdr.dst_addr);
2154 case RTE_FLOW_ITEM_TYPE_UDP:
2155 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2156 mask.udp = flow_tcf_item_mask
2157 (items, &rte_flow_item_udp_mask,
2158 &flow_tcf_mask_supported.udp,
2159 &flow_tcf_mask_empty.udp,
2160 sizeof(flow_tcf_mask_supported.udp),
2164 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2166 if (mask.udp == &flow_tcf_mask_empty.udp)
2168 spec.udp = items->spec;
2169 if (mask.udp->hdr.src_port) {
2170 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
2171 spec.udp->hdr.src_port);
2172 mnl_attr_put_u16(nlh,
2173 TCA_FLOWER_KEY_UDP_SRC_MASK,
2174 mask.udp->hdr.src_port);
2176 if (mask.udp->hdr.dst_port) {
2177 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
2178 spec.udp->hdr.dst_port);
2179 mnl_attr_put_u16(nlh,
2180 TCA_FLOWER_KEY_UDP_DST_MASK,
2181 mask.udp->hdr.dst_port);
2184 case RTE_FLOW_ITEM_TYPE_TCP:
2185 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2186 mask.tcp = flow_tcf_item_mask
2187 (items, &rte_flow_item_tcp_mask,
2188 &flow_tcf_mask_supported.tcp,
2189 &flow_tcf_mask_empty.tcp,
2190 sizeof(flow_tcf_mask_supported.tcp),
2194 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2196 if (mask.tcp == &flow_tcf_mask_empty.tcp)
2198 spec.tcp = items->spec;
2199 if (mask.tcp->hdr.src_port) {
2200 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2201 spec.tcp->hdr.src_port);
2202 mnl_attr_put_u16(nlh,
2203 TCA_FLOWER_KEY_TCP_SRC_MASK,
2204 mask.tcp->hdr.src_port);
2206 if (mask.tcp->hdr.dst_port) {
2207 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2208 spec.tcp->hdr.dst_port);
2209 mnl_attr_put_u16(nlh,
2210 TCA_FLOWER_KEY_TCP_DST_MASK,
2211 mask.tcp->hdr.dst_port);
2213 if (mask.tcp->hdr.tcp_flags) {
2216 TCA_FLOWER_KEY_TCP_FLAGS,
2218 (spec.tcp->hdr.tcp_flags));
2221 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2223 (mask.tcp->hdr.tcp_flags));
2227 return rte_flow_error_set(error, ENOTSUP,
2228 RTE_FLOW_ERROR_TYPE_ITEM,
2229 NULL, "item not supported");
2232 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2233 na_act_index_cur = 1;
2234 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2235 struct nlattr *na_act_index;
2236 struct nlattr *na_act;
2237 unsigned int vlan_act;
2240 switch (actions->type) {
2241 case RTE_FLOW_ACTION_TYPE_VOID:
2243 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2244 conf.port_id = actions->conf;
2245 if (conf.port_id->original)
2248 for (i = 0; ptoi[i].ifindex; ++i)
2249 if (ptoi[i].port_id == conf.port_id->id)
2251 assert(ptoi[i].ifindex);
2253 mnl_attr_nest_start(nlh, na_act_index_cur++);
2254 assert(na_act_index);
2255 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2256 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2258 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2259 sizeof(struct tc_mirred),
2260 &(struct tc_mirred){
2261 .action = TC_ACT_STOLEN,
2262 .eaction = TCA_EGRESS_REDIR,
2263 .ifindex = ptoi[i].ifindex,
2265 mnl_attr_nest_end(nlh, na_act);
2266 mnl_attr_nest_end(nlh, na_act_index);
2268 case RTE_FLOW_ACTION_TYPE_JUMP:
2269 conf.jump = actions->conf;
2271 mnl_attr_nest_start(nlh, na_act_index_cur++);
2272 assert(na_act_index);
2273 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2274 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2276 mnl_attr_put(nlh, TCA_GACT_PARMS,
2277 sizeof(struct tc_gact),
2279 .action = TC_ACT_GOTO_CHAIN |
2282 mnl_attr_nest_end(nlh, na_act);
2283 mnl_attr_nest_end(nlh, na_act_index);
2285 case RTE_FLOW_ACTION_TYPE_DROP:
2287 mnl_attr_nest_start(nlh, na_act_index_cur++);
2288 assert(na_act_index);
2289 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2290 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2292 mnl_attr_put(nlh, TCA_GACT_PARMS,
2293 sizeof(struct tc_gact),
2295 .action = TC_ACT_SHOT,
2297 mnl_attr_nest_end(nlh, na_act);
2298 mnl_attr_nest_end(nlh, na_act_index);
2300 case RTE_FLOW_ACTION_TYPE_COUNT:
2302 * Driver adds the count action implicitly for
2303 * each rule it creates.
2305 ret = flow_tcf_translate_action_count(dev,
2310 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2311 conf.of_push_vlan = NULL;
2312 vlan_act = TCA_VLAN_ACT_POP;
2313 goto action_of_vlan;
2314 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2315 conf.of_push_vlan = actions->conf;
2316 vlan_act = TCA_VLAN_ACT_PUSH;
2317 goto action_of_vlan;
2318 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2319 conf.of_set_vlan_vid = actions->conf;
2321 goto override_na_vlan_id;
2322 vlan_act = TCA_VLAN_ACT_MODIFY;
2323 goto action_of_vlan;
2324 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2325 conf.of_set_vlan_pcp = actions->conf;
2326 if (na_vlan_priority)
2327 goto override_na_vlan_priority;
2328 vlan_act = TCA_VLAN_ACT_MODIFY;
2329 goto action_of_vlan;
2332 mnl_attr_nest_start(nlh, na_act_index_cur++);
2333 assert(na_act_index);
2334 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2335 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2337 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2338 sizeof(struct tc_vlan),
2340 .action = TC_ACT_PIPE,
2341 .v_action = vlan_act,
2343 if (vlan_act == TCA_VLAN_ACT_POP) {
2344 mnl_attr_nest_end(nlh, na_act);
2345 mnl_attr_nest_end(nlh, na_act_index);
2348 if (vlan_act == TCA_VLAN_ACT_PUSH)
2349 mnl_attr_put_u16(nlh,
2350 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2351 conf.of_push_vlan->ethertype);
2352 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2353 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2354 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2355 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2356 mnl_attr_nest_end(nlh, na_act);
2357 mnl_attr_nest_end(nlh, na_act_index);
2358 if (actions->type ==
2359 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2360 override_na_vlan_id:
2361 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2362 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2364 (conf.of_set_vlan_vid->vlan_vid);
2365 } else if (actions->type ==
2366 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2367 override_na_vlan_priority:
2368 na_vlan_priority->nla_type =
2369 TCA_VLAN_PUSH_VLAN_PRIORITY;
2370 *(uint8_t *)mnl_attr_get_payload
2371 (na_vlan_priority) =
2372 conf.of_set_vlan_pcp->vlan_pcp;
2375 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2376 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2377 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2378 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2379 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2380 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2381 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2382 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2383 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2384 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2386 mnl_attr_nest_start(nlh, na_act_index_cur++);
2387 flow_tcf_create_pedit_mnl_msg(nlh,
2388 &actions, item_flags);
2389 mnl_attr_nest_end(nlh, na_act_index);
2392 return rte_flow_error_set(error, ENOTSUP,
2393 RTE_FLOW_ERROR_TYPE_ACTION,
2395 "action not supported");
2399 assert(na_flower_act);
2400 mnl_attr_nest_end(nlh, na_flower_act);
2401 mnl_attr_nest_end(nlh, na_flower);
2406 * Send Netlink message with acknowledgment.
2409 * Flow context to use.
2411 * Message to send. This function always raises the NLM_F_ACK flag before
2415 * 0 on success, a negative errno value otherwise and rte_errno is set.
2418 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2420 alignas(struct nlmsghdr)
2421 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2422 nlh->nlmsg_len - sizeof(*nlh)];
2423 uint32_t seq = ctx->seq++;
2424 struct mnl_socket *nl = ctx->nl;
2427 nlh->nlmsg_flags |= NLM_F_ACK;
2428 nlh->nlmsg_seq = seq;
2429 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2431 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2434 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2442 * Apply flow to E-Switch by sending Netlink message.
2445 * Pointer to Ethernet device.
2446 * @param[in, out] flow
2447 * Pointer to the sub flow.
2449 * Pointer to the error structure.
2452 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2455 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2456 struct rte_flow_error *error)
2458 struct priv *priv = dev->data->dev_private;
2459 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2460 struct mlx5_flow *dev_flow;
2461 struct nlmsghdr *nlh;
2463 dev_flow = LIST_FIRST(&flow->dev_flows);
2464 /* E-Switch flow can't be expanded. */
2465 assert(!LIST_NEXT(dev_flow, next));
2466 nlh = dev_flow->tcf.nlh;
2467 nlh->nlmsg_type = RTM_NEWTFILTER;
2468 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2469 if (!flow_tcf_nl_ack(ctx, nlh))
2471 return rte_flow_error_set(error, rte_errno,
2472 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2473 "netlink: failed to create TC flow rule");
2477 * Remove flow from E-Switch by sending Netlink message.
2480 * Pointer to Ethernet device.
2481 * @param[in, out] flow
2482 * Pointer to the sub flow.
2485 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2487 struct priv *priv = dev->data->dev_private;
2488 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2489 struct mlx5_flow *dev_flow;
2490 struct nlmsghdr *nlh;
2494 if (flow->counter) {
2495 if (--flow->counter->ref_cnt == 0) {
2496 rte_free(flow->counter);
2497 flow->counter = NULL;
2500 dev_flow = LIST_FIRST(&flow->dev_flows);
2503 /* E-Switch flow can't be expanded. */
2504 assert(!LIST_NEXT(dev_flow, next));
2505 nlh = dev_flow->tcf.nlh;
2506 nlh->nlmsg_type = RTM_DELTFILTER;
2507 nlh->nlmsg_flags = NLM_F_REQUEST;
2508 flow_tcf_nl_ack(ctx, nlh);
2512 * Remove flow from E-Switch and release resources of the device flow.
2515 * Pointer to Ethernet device.
2516 * @param[in, out] flow
2517 * Pointer to the sub flow.
2520 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2522 struct mlx5_flow *dev_flow;
2526 flow_tcf_remove(dev, flow);
2527 dev_flow = LIST_FIRST(&flow->dev_flows);
2530 /* E-Switch flow can't be expanded. */
2531 assert(!LIST_NEXT(dev_flow, next));
2532 LIST_REMOVE(dev_flow, next);
2537 * Helper routine for figuring the space size required for a parse buffer.
2540 * array of values to use.
2542 * Current location in array.
2544 * Value to compare with.
2547 * The maximum between the given value and the array value on index.
2550 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2552 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2556 * Parse rtnetlink message attributes filling the attribute table with the info
2560 * Attribute table to be filled.
2562 * Maxinum entry in the attribute table.
2564 * The attributes section in the message to be parsed.
2566 * The length of the attributes section in the message.
2569 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2570 struct rtattr *rta, int len)
2572 unsigned short type;
2573 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2574 while (RTA_OK(rta, len)) {
2575 type = rta->rta_type;
2576 if (type <= max && !tb[type])
2578 rta = RTA_NEXT(rta, len);
2583 * Extract flow counters from flower action.
2586 * flower action stats properties in the Netlink message received.
2588 * The backward sequence of rta_types, as written in the attribute table,
2589 * we need to traverse in order to get to the requested object.
2591 * Current location in rta_type table.
2593 * data holding the count statistics of the rte_flow retrieved from
2597 * 0 if data was found and retrieved, -1 otherwise.
2600 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2601 uint16_t rta_type[], int idx,
2602 struct gnet_stats_basic *data)
2604 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2606 struct rtattr *tbs[tca_stats_max + 1];
2608 if (rta == NULL || idx < 0)
2610 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2611 RTA_DATA(rta), RTA_PAYLOAD(rta));
2612 switch (rta_type[idx]) {
2613 case TCA_STATS_BASIC:
2614 if (tbs[TCA_STATS_BASIC]) {
2615 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2616 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2628 * Parse flower single action retrieving the requested action attribute,
2632 * flower action properties in the Netlink message received.
2634 * The backward sequence of rta_types, as written in the attribute table,
2635 * we need to traverse in order to get to the requested object.
2637 * Current location in rta_type table.
2639 * Count statistics retrieved from the message query.
2642 * 0 if data was found and retrieved, -1 otherwise.
2645 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2646 uint16_t rta_type[], int idx, void *data)
2648 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2649 struct rtattr *tb[tca_act_max + 1];
2651 if (arg == NULL || idx < 0)
2653 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2654 RTA_DATA(arg), RTA_PAYLOAD(arg));
2655 if (tb[TCA_ACT_KIND] == NULL)
2657 switch (rta_type[idx]) {
2659 if (tb[TCA_ACT_STATS])
2660 return flow_tcf_nl_action_stats_parse_and_get
2663 (struct gnet_stats_basic *)data);
2672 * Parse flower action section in the message retrieving the requested
2673 * attribute from the first action that provides it.
2676 * flower section in the Netlink message received.
2678 * The backward sequence of rta_types, as written in the attribute table,
2679 * we need to traverse in order to get to the requested object.
2681 * Current location in rta_type table.
2683 * data retrieved from the message query.
2686 * 0 if data was found and retrieved, -1 otherwise.
2689 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2690 uint16_t rta_type[], int idx, void *data)
2692 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2695 if (arg == NULL || idx < 0)
2697 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2698 RTA_DATA(arg), RTA_PAYLOAD(arg));
2699 switch (rta_type[idx]) {
2701 * flow counters are stored in the actions defined by the flow
2702 * and not in the flow itself, therefore we need to traverse the
2703 * flower chain of actions in search for them.
2705 * Note that the index is not decremented here.
2708 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2710 !flow_tcf_nl_parse_one_action_and_get(tb[i],
2723 * Parse flower classifier options in the message, retrieving the requested
2724 * attribute if found.
2727 * flower section in the Netlink message received.
2729 * The backward sequence of rta_types, as written in the attribute table,
2730 * we need to traverse in order to get to the requested object.
2732 * Current location in rta_type table.
2734 * data retrieved from the message query.
2737 * 0 if data was found and retrieved, -1 otherwise.
2740 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2741 uint16_t rta_type[], int idx, void *data)
2743 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2745 struct rtattr *tb[tca_flower_max + 1];
2747 if (!opt || idx < 0)
2749 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2750 RTA_DATA(opt), RTA_PAYLOAD(opt));
2751 switch (rta_type[idx]) {
2752 case TCA_FLOWER_ACT:
2753 if (tb[TCA_FLOWER_ACT])
2754 return flow_tcf_nl_action_parse_and_get
2755 (tb[TCA_FLOWER_ACT],
2756 rta_type, --idx, data);
2765 * Parse Netlink reply on filter query, retrieving the flow counters.
2768 * Message received from Netlink.
2770 * The backward sequence of rta_types, as written in the attribute table,
2771 * we need to traverse in order to get to the requested object.
2773 * Current location in rta_type table.
2775 * data retrieved from the message query.
2778 * 0 if data was found and retrieved, -1 otherwise.
2781 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2782 uint16_t rta_type[], int idx, void *data)
2784 struct nlmsghdr *nlh = cnlh;
2785 struct tcmsg *t = NLMSG_DATA(nlh);
2786 int len = nlh->nlmsg_len;
2787 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2788 struct rtattr *tb[tca_max + 1];
2792 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2793 nlh->nlmsg_type != RTM_GETTFILTER &&
2794 nlh->nlmsg_type != RTM_DELTFILTER)
2796 len -= NLMSG_LENGTH(sizeof(*t));
2799 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2800 /* Not a TC flower flow - bail out */
2801 if (!tb[TCA_KIND] ||
2802 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2804 switch (rta_type[idx]) {
2806 if (tb[TCA_OPTIONS])
2807 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2818 * A callback to parse Netlink reply on TC flower query.
2821 * Message received from Netlink.
2823 * Pointer to data area to be filled by the parsing routine.
2824 * assumed to be a pinter to struct flow_tcf_stats_basic.
2830 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2833 * The backward sequence of rta_types to pass in order to get
2836 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2837 TCA_FLOWER_ACT, TCA_OPTIONS };
2838 struct flow_tcf_stats_basic *sb_data = data;
2840 const struct nlmsghdr *c;
2841 struct nlmsghdr *nc;
2842 } tnlh = { .c = nlh };
2844 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2845 RTE_DIM(rta_type) - 1,
2846 (void *)&sb_data->counters))
2847 sb_data->valid = true;
2852 * Query a TC flower rule for its statistics via netlink.
2855 * Pointer to Ethernet device.
2857 * Pointer to the sub flow.
2859 * data retrieved by the query.
2861 * Perform verbose error reporting if not NULL.
2864 * 0 on success, a negative errno value otherwise and rte_errno is set.
2867 flow_tcf_query_count(struct rte_eth_dev *dev,
2868 struct rte_flow *flow,
2870 struct rte_flow_error *error)
2872 struct flow_tcf_stats_basic sb_data = { 0 };
2873 struct rte_flow_query_count *qc = data;
2874 struct priv *priv = dev->data->dev_private;
2875 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2876 struct mnl_socket *nl = ctx->nl;
2877 struct mlx5_flow *dev_flow;
2878 struct nlmsghdr *nlh;
2879 uint32_t seq = priv->tcf_context->seq++;
2883 dev_flow = LIST_FIRST(&flow->dev_flows);
2884 /* E-Switch flow can't be expanded. */
2885 assert(!LIST_NEXT(dev_flow, next));
2886 if (!dev_flow->flow->counter)
2888 nlh = dev_flow->tcf.nlh;
2889 nlh->nlmsg_type = RTM_GETTFILTER;
2890 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2891 nlh->nlmsg_seq = seq;
2892 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2895 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2898 ret = mnl_cb_run(ctx->buf, ret, seq,
2899 mnl_socket_get_portid(nl),
2900 flow_tcf_nl_message_get_stats_basic,
2903 /* Return the delta from last reset. */
2904 if (sb_data.valid) {
2905 /* Return the delta from last reset. */
2908 qc->hits = sb_data.counters.packets - flow->counter->hits;
2909 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2911 flow->counter->hits = sb_data.counters.packets;
2912 flow->counter->bytes = sb_data.counters.bytes;
2916 return rte_flow_error_set(error, EINVAL,
2917 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2919 "flow does not have counter");
2921 return rte_flow_error_set
2922 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2923 NULL, "netlink: failed to read flow rule counters");
2925 return rte_flow_error_set
2926 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2927 NULL, "counters are not available.");
2933 * @see rte_flow_query()
2937 flow_tcf_query(struct rte_eth_dev *dev,
2938 struct rte_flow *flow,
2939 const struct rte_flow_action *actions,
2941 struct rte_flow_error *error)
2945 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2946 switch (actions->type) {
2947 case RTE_FLOW_ACTION_TYPE_VOID:
2949 case RTE_FLOW_ACTION_TYPE_COUNT:
2950 ret = flow_tcf_query_count(dev, flow, data, error);
2953 return rte_flow_error_set(error, ENOTSUP,
2954 RTE_FLOW_ERROR_TYPE_ACTION,
2956 "action not supported");
2962 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2963 .validate = flow_tcf_validate,
2964 .prepare = flow_tcf_prepare,
2965 .translate = flow_tcf_translate,
2966 .apply = flow_tcf_apply,
2967 .remove = flow_tcf_remove,
2968 .destroy = flow_tcf_destroy,
2969 .query = flow_tcf_query,
2973 * Create and configure a libmnl socket for Netlink flow rules.
2976 * A valid libmnl socket object pointer on success, NULL otherwise and
2979 static struct mnl_socket *
2980 flow_tcf_mnl_socket_create(void)
2982 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2985 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2987 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2992 mnl_socket_close(nl);
2997 * Destroy a libmnl socket.
3000 * Libmnl socket of the @p NETLINK_ROUTE kind.
3003 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
3006 mnl_socket_close(nl);
3010 * Initialize ingress qdisc of a given network interface.
3013 * Pointer to tc-flower context to use.
3015 * Index of network interface to initialize.
3017 * Perform verbose error reporting if not NULL.
3020 * 0 on success, a negative errno value otherwise and rte_errno is set.
3023 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
3024 unsigned int ifindex, struct rte_flow_error *error)
3026 struct nlmsghdr *nlh;
3028 alignas(struct nlmsghdr)
3029 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
3031 /* Destroy existing ingress qdisc and everything attached to it. */
3032 nlh = mnl_nlmsg_put_header(buf);
3033 nlh->nlmsg_type = RTM_DELQDISC;
3034 nlh->nlmsg_flags = NLM_F_REQUEST;
3035 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3036 tcm->tcm_family = AF_UNSPEC;
3037 tcm->tcm_ifindex = ifindex;
3038 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3039 tcm->tcm_parent = TC_H_INGRESS;
3040 /* Ignore errors when qdisc is already absent. */
3041 if (flow_tcf_nl_ack(ctx, nlh) &&
3042 rte_errno != EINVAL && rte_errno != ENOENT)
3043 return rte_flow_error_set(error, rte_errno,
3044 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3045 "netlink: failed to remove ingress"
3047 /* Create fresh ingress qdisc. */
3048 nlh = mnl_nlmsg_put_header(buf);
3049 nlh->nlmsg_type = RTM_NEWQDISC;
3050 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3051 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3052 tcm->tcm_family = AF_UNSPEC;
3053 tcm->tcm_ifindex = ifindex;
3054 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3055 tcm->tcm_parent = TC_H_INGRESS;
3056 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
3057 if (flow_tcf_nl_ack(ctx, nlh))
3058 return rte_flow_error_set(error, rte_errno,
3059 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3060 "netlink: failed to create ingress"
3066 * Create libmnl context for Netlink flow rules.
3069 * A valid libmnl socket object pointer on success, NULL otherwise and
3072 struct mlx5_flow_tcf_context *
3073 mlx5_flow_tcf_context_create(void)
3075 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
3080 ctx->nl = flow_tcf_mnl_socket_create();
3083 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
3084 ctx->buf = rte_zmalloc(__func__,
3085 ctx->buf_size, sizeof(uint32_t));
3088 ctx->seq = random();
3091 mlx5_flow_tcf_context_destroy(ctx);
3096 * Destroy a libmnl context.
3099 * Libmnl socket of the @p NETLINK_ROUTE kind.
3102 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
3106 flow_tcf_mnl_socket_destroy(ctx->nl);