1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
36 #ifdef HAVE_TC_ACT_VLAN
38 #include <linux/tc_act/tc_vlan.h>
40 #else /* HAVE_TC_ACT_VLAN */
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
56 #endif /* HAVE_TC_ACT_VLAN */
58 #ifdef HAVE_TC_ACT_PEDIT
60 #include <linux/tc_act/tc_pedit.h>
62 #else /* HAVE_TC_ACT_VLAN */
76 TCA_PEDIT_KEY_EX_HTYPE = 1,
77 TCA_PEDIT_KEY_EX_CMD = 2,
78 __TCA_PEDIT_KEY_EX_MAX
81 enum pedit_header_type {
82 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
92 TCA_PEDIT_KEY_EX_CMD_SET = 0,
93 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
100 __u32 off; /*offset */
107 struct tc_pedit_sel {
111 struct tc_pedit_key keys[0];
114 #endif /* HAVE_TC_ACT_VLAN */
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
118 #include <linux/tc_act/tc_tunnel_key.h>
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
142 struct tc_tunnel_key {
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
163 #ifndef HAVE_TCA_CHAIN
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323 FLOW_TCF_TUNACT_VXLAN_DECAP,
324 FLOW_TCF_TUNACT_VXLAN_ENCAP,
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
339 * Structure for holding netlink context.
340 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341 * Using this (8KB) buffer size ensures that netlink messages will never be
344 struct mlx5_flow_tcf_context {
345 struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346 uint32_t seq; /* Message sequence number. */
347 uint32_t buf_size; /* Message buffer size. */
348 uint8_t *buf; /* Message buffer. */
351 /** Structure used when extracting the values of a flow counters
352 * from a netlink message.
354 struct flow_tcf_stats_basic {
356 struct gnet_stats_basic counters;
359 /** Empty masks for known item types. */
361 struct rte_flow_item_port_id port_id;
362 struct rte_flow_item_eth eth;
363 struct rte_flow_item_vlan vlan;
364 struct rte_flow_item_ipv4 ipv4;
365 struct rte_flow_item_ipv6 ipv6;
366 struct rte_flow_item_tcp tcp;
367 struct rte_flow_item_udp udp;
368 } flow_tcf_mask_empty;
370 /** Supported masks for known item types. */
371 static const struct {
372 struct rte_flow_item_port_id port_id;
373 struct rte_flow_item_eth eth;
374 struct rte_flow_item_vlan vlan;
375 struct rte_flow_item_ipv4 ipv4;
376 struct rte_flow_item_ipv6 ipv6;
377 struct rte_flow_item_tcp tcp;
378 struct rte_flow_item_udp udp;
379 } flow_tcf_mask_supported = {
384 .type = RTE_BE16(0xffff),
385 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
386 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
389 /* PCP and VID only, no DEI. */
390 .tci = RTE_BE16(0xefff),
391 .inner_type = RTE_BE16(0xffff),
394 .next_proto_id = 0xff,
395 .src_addr = RTE_BE32(0xffffffff),
396 .dst_addr = RTE_BE32(0xffffffff),
401 "\xff\xff\xff\xff\xff\xff\xff\xff"
402 "\xff\xff\xff\xff\xff\xff\xff\xff",
404 "\xff\xff\xff\xff\xff\xff\xff\xff"
405 "\xff\xff\xff\xff\xff\xff\xff\xff",
408 .src_port = RTE_BE16(0xffff),
409 .dst_port = RTE_BE16(0xffff),
413 .src_port = RTE_BE16(0xffff),
414 .dst_port = RTE_BE16(0xffff),
418 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
419 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
420 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
421 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
422 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
424 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
426 /** DPDK port to network interface index (ifindex) conversion. */
427 struct flow_tcf_ptoi {
428 uint16_t port_id; /**< DPDK port ID. */
429 unsigned int ifindex; /**< Network interface index. */
432 /* Due to a limitation on driver/FW. */
433 #define MLX5_TCF_GROUP_ID_MAX 3
434 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
436 #define MLX5_TCF_FATE_ACTIONS \
437 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
438 MLX5_FLOW_ACTION_JUMP)
440 #define MLX5_TCF_VLAN_ACTIONS \
441 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
442 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
444 #define MLX5_TCF_VXLAN_ACTIONS \
445 (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
447 #define MLX5_TCF_PEDIT_ACTIONS \
448 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
449 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
450 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
451 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
452 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
454 #define MLX5_TCF_CONFIG_ACTIONS \
455 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
456 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
457 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
458 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
460 #define MAX_PEDIT_KEYS 128
461 #define SZ_PEDIT_KEY_VAL 4
463 #define NUM_OF_PEDIT_KEYS(sz) \
464 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
466 struct pedit_key_ex {
467 enum pedit_header_type htype;
471 struct pedit_parser {
472 struct tc_pedit_sel sel;
473 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
474 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
478 * Create space for using the implicitly created TC flow counter.
481 * Pointer to the Ethernet device structure.
484 * A pointer to the counter data structure, NULL otherwise and
487 static struct mlx5_flow_counter *
488 flow_tcf_counter_new(void)
490 struct mlx5_flow_counter *cnt;
493 * eswitch counter cannot be shared and its id is unknown.
494 * currently returning all with id 0.
495 * in the future maybe better to switch to unique numbers.
497 struct mlx5_flow_counter tmpl = {
500 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
506 /* Implicit counter, do not add to list. */
511 * Set pedit key of MAC address
514 * pointer to action specification
515 * @param[in,out] p_parser
516 * pointer to pedit_parser
519 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
520 struct pedit_parser *p_parser)
522 int idx = p_parser->sel.nkeys;
523 uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
524 offsetof(struct ether_hdr, s_addr) :
525 offsetof(struct ether_hdr, d_addr);
526 const struct rte_flow_action_set_mac *conf =
527 (const struct rte_flow_action_set_mac *)actions->conf;
529 p_parser->keys[idx].off = off;
530 p_parser->keys[idx].mask = ~UINT32_MAX;
531 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
532 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
533 memcpy(&p_parser->keys[idx].val,
534 conf->mac_addr, SZ_PEDIT_KEY_VAL);
536 p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
537 p_parser->keys[idx].mask = 0xFFFF0000;
538 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
539 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
540 memcpy(&p_parser->keys[idx].val,
541 conf->mac_addr + SZ_PEDIT_KEY_VAL,
542 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
543 p_parser->sel.nkeys = (++idx);
547 * Set pedit key of decrease/set ttl
550 * pointer to action specification
551 * @param[in,out] p_parser
552 * pointer to pedit_parser
553 * @param[in] item_flags
554 * flags of all items presented
557 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
558 struct pedit_parser *p_parser,
561 int idx = p_parser->sel.nkeys;
563 p_parser->keys[idx].mask = 0xFFFFFF00;
564 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
565 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
566 p_parser->keys[idx].off =
567 offsetof(struct ipv4_hdr, time_to_live);
569 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
570 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
571 p_parser->keys[idx].off =
572 offsetof(struct ipv6_hdr, hop_limits);
574 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
575 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
576 p_parser->keys[idx].val = 0x000000FF;
578 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
579 p_parser->keys[idx].val =
580 (__u32)((const struct rte_flow_action_set_ttl *)
581 actions->conf)->ttl_value;
583 p_parser->sel.nkeys = (++idx);
587 * Set pedit key of transport (TCP/UDP) port value
590 * pointer to action specification
591 * @param[in,out] p_parser
592 * pointer to pedit_parser
593 * @param[in] item_flags
594 * flags of all items presented
597 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
598 struct pedit_parser *p_parser,
601 int idx = p_parser->sel.nkeys;
603 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
604 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
605 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
606 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
607 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
608 /* offset of src/dst port is same for TCP and UDP */
609 p_parser->keys[idx].off =
610 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
611 offsetof(struct tcp_hdr, src_port) :
612 offsetof(struct tcp_hdr, dst_port);
613 p_parser->keys[idx].mask = 0xFFFF0000;
614 p_parser->keys[idx].val =
615 (__u32)((const struct rte_flow_action_set_tp *)
616 actions->conf)->port;
617 p_parser->sel.nkeys = (++idx);
621 * Set pedit key of ipv6 address
624 * pointer to action specification
625 * @param[in,out] p_parser
626 * pointer to pedit_parser
629 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
630 struct pedit_parser *p_parser)
632 int idx = p_parser->sel.nkeys;
633 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
635 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
636 offsetof(struct ipv6_hdr, src_addr) :
637 offsetof(struct ipv6_hdr, dst_addr);
638 const struct rte_flow_action_set_ipv6 *conf =
639 (const struct rte_flow_action_set_ipv6 *)actions->conf;
641 for (int i = 0; i < keys; i++, idx++) {
642 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
643 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
644 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
645 p_parser->keys[idx].mask = ~UINT32_MAX;
646 memcpy(&p_parser->keys[idx].val,
647 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
650 p_parser->sel.nkeys += keys;
654 * Set pedit key of ipv4 address
657 * pointer to action specification
658 * @param[in,out] p_parser
659 * pointer to pedit_parser
662 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
663 struct pedit_parser *p_parser)
665 int idx = p_parser->sel.nkeys;
667 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
668 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
669 p_parser->keys[idx].off =
670 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
671 offsetof(struct ipv4_hdr, src_addr) :
672 offsetof(struct ipv4_hdr, dst_addr);
673 p_parser->keys[idx].mask = ~UINT32_MAX;
674 p_parser->keys[idx].val =
675 ((const struct rte_flow_action_set_ipv4 *)
676 actions->conf)->ipv4_addr;
677 p_parser->sel.nkeys = (++idx);
681 * Create the pedit's na attribute in netlink message
682 * on pre-allocate message buffer
685 * pointer to pre-allocated netlink message buffer
686 * @param[in,out] actions
687 * pointer to pointer of actions specification.
688 * @param[in,out] action_flags
689 * pointer to actions flags
690 * @param[in] item_flags
691 * flags of all item presented
694 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
695 const struct rte_flow_action **actions,
698 struct pedit_parser p_parser;
699 struct nlattr *na_act_options;
700 struct nlattr *na_pedit_keys;
702 memset(&p_parser, 0, sizeof(p_parser));
703 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
704 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
705 /* all modify header actions should be in one tc-pedit action */
706 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
707 switch ((*actions)->type) {
708 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
709 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
710 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
712 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
713 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
714 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
716 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
717 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
718 flow_tcf_pedit_key_set_tp_port(*actions,
719 &p_parser, item_flags);
721 case RTE_FLOW_ACTION_TYPE_SET_TTL:
722 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
723 flow_tcf_pedit_key_set_dec_ttl(*actions,
724 &p_parser, item_flags);
726 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
727 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
728 flow_tcf_pedit_key_set_mac(*actions, &p_parser);
731 goto pedit_mnl_msg_done;
735 p_parser.sel.action = TC_ACT_PIPE;
736 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
737 sizeof(p_parser.sel) +
738 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
741 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
742 for (int i = 0; i < p_parser.sel.nkeys; i++) {
743 struct nlattr *na_pedit_key =
744 mnl_attr_nest_start(nl,
745 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
746 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
747 p_parser.keys_ex[i].htype);
748 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
749 p_parser.keys_ex[i].cmd);
750 mnl_attr_nest_end(nl, na_pedit_key);
752 mnl_attr_nest_end(nl, na_pedit_keys);
753 mnl_attr_nest_end(nl, na_act_options);
758 * Calculate max memory size of one TC-pedit actions.
759 * One TC-pedit action can contain set of keys each defining
760 * a rewrite element (rte_flow action)
762 * @param[in,out] actions
763 * actions specification.
764 * @param[in,out] action_flags
766 * @param[in,out] size
769 * Max memory size of one TC-pedit action
772 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
773 uint64_t *action_flags)
779 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
780 SZ_NLATTR_STRZ_OF("pedit") +
781 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
782 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
783 switch ((*actions)->type) {
784 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
785 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
786 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
788 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
789 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
790 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
792 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
793 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
794 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
796 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
797 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
798 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
800 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
801 /* TCP is as same as UDP */
802 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
803 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
805 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
806 /* TCP is as same as UDP */
807 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
808 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
810 case RTE_FLOW_ACTION_TYPE_SET_TTL:
811 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
812 flags |= MLX5_FLOW_ACTION_SET_TTL;
814 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
815 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
816 flags |= MLX5_FLOW_ACTION_DEC_TTL;
818 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
819 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
820 flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
822 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
823 keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
824 flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
827 goto get_pedit_action_size_done;
830 get_pedit_action_size_done:
831 /* TCA_PEDIT_PARAMS_EX */
833 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
834 keys * sizeof(struct tc_pedit_key));
835 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
837 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
838 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
839 SZ_NLATTR_DATA_OF(2));
840 (*action_flags) |= flags;
846 * Retrieve mask for pattern item.
848 * This function does basic sanity checks on a pattern item in order to
849 * return the most appropriate mask for it.
852 * Item specification.
853 * @param[in] mask_default
854 * Default mask for pattern item as specified by the flow API.
855 * @param[in] mask_supported
856 * Mask fields supported by the implementation.
857 * @param[in] mask_empty
858 * Empty mask to return when there is no specification.
860 * Perform verbose error reporting if not NULL.
863 * Either @p item->mask or one of the mask parameters on success, NULL
864 * otherwise and rte_errno is set.
867 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
868 const void *mask_supported, const void *mask_empty,
869 size_t mask_size, struct rte_flow_error *error)
874 /* item->last and item->mask cannot exist without item->spec. */
875 if (!item->spec && (item->mask || item->last)) {
876 rte_flow_error_set(error, EINVAL,
877 RTE_FLOW_ERROR_TYPE_ITEM, item,
878 "\"mask\" or \"last\" field provided without"
879 " a corresponding \"spec\"");
882 /* No spec, no mask, no problem. */
885 mask = item->mask ? item->mask : mask_default;
888 * Single-pass check to make sure that:
889 * - Mask is supported, no bits are set outside mask_supported.
890 * - Both item->spec and item->last are included in mask.
892 for (i = 0; i != mask_size; ++i) {
895 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
896 ((const uint8_t *)mask_supported)[i]) {
897 rte_flow_error_set(error, ENOTSUP,
898 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
899 "unsupported field found"
904 (((const uint8_t *)item->spec)[i] & mask[i]) !=
905 (((const uint8_t *)item->last)[i] & mask[i])) {
906 rte_flow_error_set(error, EINVAL,
907 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
909 "range between \"spec\" and \"last\""
910 " not comprised in \"mask\"");
918 * Build a conversion table between port ID and ifindex.
921 * Pointer to Ethernet device.
923 * Pointer to ptoi table.
925 * Size of ptoi table provided.
928 * Size of ptoi table filled.
931 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
934 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
935 uint16_t port_id[n + 1];
937 unsigned int own = 0;
939 /* At least one port is needed when no switch domain is present. */
942 port_id[0] = dev->data->port_id;
944 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
948 for (i = 0; i != n; ++i) {
949 struct rte_eth_dev_info dev_info;
951 rte_eth_dev_info_get(port_id[i], &dev_info);
952 if (port_id[i] == dev->data->port_id)
954 ptoi[i].port_id = port_id[i];
955 ptoi[i].ifindex = dev_info.if_index;
957 /* Ensure first entry of ptoi[] is the current device. */
963 /* An entry with zero ifindex terminates ptoi[]. */
970 * Verify the @p attr will be correctly understood by the E-switch.
973 * Pointer to flow attributes
975 * Pointer to error structure.
978 * 0 on success, a negative errno value otherwise and rte_errno is set.
981 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
982 struct rte_flow_error *error)
985 * Supported attributes: groups, some priorities and ingress only.
986 * group is supported only if kernel supports chain. Don't care about
987 * transfer as it is the caller's problem.
989 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
990 return rte_flow_error_set(error, ENOTSUP,
991 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
992 "group ID larger than "
993 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
995 else if (attr->group > 0 &&
996 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
997 return rte_flow_error_set(error, ENOTSUP,
998 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1000 "lowest priority level is "
1001 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1002 " when group is configured");
1003 else if (attr->priority > 0xfffe)
1004 return rte_flow_error_set(error, ENOTSUP,
1005 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1007 "lowest priority level is 0xfffe");
1009 return rte_flow_error_set(error, EINVAL,
1010 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1011 attr, "only ingress is supported");
1013 return rte_flow_error_set(error, ENOTSUP,
1014 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1015 attr, "egress is not supported");
1020 * Validate flow for E-Switch.
1023 * Pointer to the priv structure.
1025 * Pointer to the flow attributes.
1027 * Pointer to the list of items.
1028 * @param[in] actions
1029 * Pointer to the list of actions.
1031 * Pointer to the error structure.
1034 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1037 flow_tcf_validate(struct rte_eth_dev *dev,
1038 const struct rte_flow_attr *attr,
1039 const struct rte_flow_item items[],
1040 const struct rte_flow_action actions[],
1041 struct rte_flow_error *error)
1044 const struct rte_flow_item_port_id *port_id;
1045 const struct rte_flow_item_eth *eth;
1046 const struct rte_flow_item_vlan *vlan;
1047 const struct rte_flow_item_ipv4 *ipv4;
1048 const struct rte_flow_item_ipv6 *ipv6;
1049 const struct rte_flow_item_tcp *tcp;
1050 const struct rte_flow_item_udp *udp;
1053 const struct rte_flow_action_port_id *port_id;
1054 const struct rte_flow_action_jump *jump;
1055 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1056 const struct rte_flow_action_of_set_vlan_vid *
1058 const struct rte_flow_action_of_set_vlan_pcp *
1060 const struct rte_flow_action_set_ipv4 *set_ipv4;
1061 const struct rte_flow_action_set_ipv6 *set_ipv6;
1063 uint64_t item_flags = 0;
1064 uint64_t action_flags = 0;
1065 uint8_t next_protocol = -1;
1066 unsigned int tcm_ifindex = 0;
1067 uint8_t pedit_validated = 0;
1068 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1069 struct rte_eth_dev *port_id_dev = NULL;
1070 bool in_port_id_set;
1073 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1074 PTOI_TABLE_SZ_MAX(dev)));
1075 ret = flow_tcf_validate_attributes(attr, error);
1078 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1081 switch (items->type) {
1082 case RTE_FLOW_ITEM_TYPE_VOID:
1084 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1085 mask.port_id = flow_tcf_item_mask
1086 (items, &rte_flow_item_port_id_mask,
1087 &flow_tcf_mask_supported.port_id,
1088 &flow_tcf_mask_empty.port_id,
1089 sizeof(flow_tcf_mask_supported.port_id),
1093 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1097 spec.port_id = items->spec;
1098 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1099 return rte_flow_error_set
1101 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1103 "no support for partial mask on"
1105 if (!mask.port_id->id)
1108 for (i = 0; ptoi[i].ifindex; ++i)
1109 if (ptoi[i].port_id == spec.port_id->id)
1111 if (!ptoi[i].ifindex)
1112 return rte_flow_error_set
1114 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1116 "missing data to convert port ID to"
1118 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1119 return rte_flow_error_set
1121 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1123 "cannot match traffic for"
1124 " several port IDs through"
1125 " a single flow rule");
1126 tcm_ifindex = ptoi[i].ifindex;
1129 case RTE_FLOW_ITEM_TYPE_ETH:
1130 ret = mlx5_flow_validate_item_eth(items, item_flags,
1134 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1136 * Redundant check due to different supported mask.
1137 * Same for the rest of items.
1139 mask.eth = flow_tcf_item_mask
1140 (items, &rte_flow_item_eth_mask,
1141 &flow_tcf_mask_supported.eth,
1142 &flow_tcf_mask_empty.eth,
1143 sizeof(flow_tcf_mask_supported.eth),
1147 if (mask.eth->type && mask.eth->type !=
1149 return rte_flow_error_set
1151 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1153 "no support for partial mask on"
1156 case RTE_FLOW_ITEM_TYPE_VLAN:
1157 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1161 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1162 mask.vlan = flow_tcf_item_mask
1163 (items, &rte_flow_item_vlan_mask,
1164 &flow_tcf_mask_supported.vlan,
1165 &flow_tcf_mask_empty.vlan,
1166 sizeof(flow_tcf_mask_supported.vlan),
1170 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1171 (mask.vlan->tci & RTE_BE16(0xe000)) !=
1172 RTE_BE16(0xe000)) ||
1173 (mask.vlan->tci & RTE_BE16(0x0fff) &&
1174 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1175 RTE_BE16(0x0fff)) ||
1176 (mask.vlan->inner_type &&
1177 mask.vlan->inner_type != RTE_BE16(0xffff)))
1178 return rte_flow_error_set
1180 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1182 "no support for partial masks on"
1183 " \"tci\" (PCP and VID parts) and"
1184 " \"inner_type\" fields");
1186 case RTE_FLOW_ITEM_TYPE_IPV4:
1187 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1191 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1192 mask.ipv4 = flow_tcf_item_mask
1193 (items, &rte_flow_item_ipv4_mask,
1194 &flow_tcf_mask_supported.ipv4,
1195 &flow_tcf_mask_empty.ipv4,
1196 sizeof(flow_tcf_mask_supported.ipv4),
1200 if (mask.ipv4->hdr.next_proto_id &&
1201 mask.ipv4->hdr.next_proto_id != 0xff)
1202 return rte_flow_error_set
1204 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1206 "no support for partial mask on"
1207 " \"hdr.next_proto_id\" field");
1208 else if (mask.ipv4->hdr.next_proto_id)
1210 ((const struct rte_flow_item_ipv4 *)
1211 (items->spec))->hdr.next_proto_id;
1213 case RTE_FLOW_ITEM_TYPE_IPV6:
1214 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1218 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1219 mask.ipv6 = flow_tcf_item_mask
1220 (items, &rte_flow_item_ipv6_mask,
1221 &flow_tcf_mask_supported.ipv6,
1222 &flow_tcf_mask_empty.ipv6,
1223 sizeof(flow_tcf_mask_supported.ipv6),
1227 if (mask.ipv6->hdr.proto &&
1228 mask.ipv6->hdr.proto != 0xff)
1229 return rte_flow_error_set
1231 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1233 "no support for partial mask on"
1234 " \"hdr.proto\" field");
1235 else if (mask.ipv6->hdr.proto)
1237 ((const struct rte_flow_item_ipv6 *)
1238 (items->spec))->hdr.proto;
1240 case RTE_FLOW_ITEM_TYPE_UDP:
1241 ret = mlx5_flow_validate_item_udp(items, item_flags,
1242 next_protocol, error);
1245 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1246 mask.udp = flow_tcf_item_mask
1247 (items, &rte_flow_item_udp_mask,
1248 &flow_tcf_mask_supported.udp,
1249 &flow_tcf_mask_empty.udp,
1250 sizeof(flow_tcf_mask_supported.udp),
1255 case RTE_FLOW_ITEM_TYPE_TCP:
1256 ret = mlx5_flow_validate_item_tcp
1259 &flow_tcf_mask_supported.tcp,
1263 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1264 mask.tcp = flow_tcf_item_mask
1265 (items, &rte_flow_item_tcp_mask,
1266 &flow_tcf_mask_supported.tcp,
1267 &flow_tcf_mask_empty.tcp,
1268 sizeof(flow_tcf_mask_supported.tcp),
1274 return rte_flow_error_set(error, ENOTSUP,
1275 RTE_FLOW_ERROR_TYPE_ITEM,
1276 NULL, "item not supported");
1279 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1281 uint64_t current_action_flag = 0;
1283 switch (actions->type) {
1284 case RTE_FLOW_ACTION_TYPE_VOID:
1286 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1287 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1290 conf.port_id = actions->conf;
1291 if (conf.port_id->original)
1294 for (i = 0; ptoi[i].ifindex; ++i)
1295 if (ptoi[i].port_id == conf.port_id->id)
1297 if (!ptoi[i].ifindex)
1298 return rte_flow_error_set
1300 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1302 "missing data to convert port ID to"
1304 port_id_dev = &rte_eth_devices[conf.port_id->id];
1306 case RTE_FLOW_ACTION_TYPE_JUMP:
1307 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1310 conf.jump = actions->conf;
1311 if (attr->group >= conf.jump->group)
1312 return rte_flow_error_set
1314 RTE_FLOW_ERROR_TYPE_ACTION,
1316 "can jump only to a group forward");
1318 case RTE_FLOW_ACTION_TYPE_DROP:
1319 current_action_flag = MLX5_FLOW_ACTION_DROP;
1321 case RTE_FLOW_ACTION_TYPE_COUNT:
1323 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1324 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1326 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1327 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1329 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1330 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1331 return rte_flow_error_set
1333 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1334 "vlan modify is not supported,"
1335 " set action must follow push action");
1336 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1338 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1339 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1340 return rte_flow_error_set
1342 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1343 "vlan modify is not supported,"
1344 " set action must follow push action");
1345 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1347 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1348 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1350 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1351 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1353 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1354 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1356 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1357 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1359 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1360 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1362 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1363 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1365 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1366 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1368 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1369 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1371 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1372 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1374 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1375 current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1378 return rte_flow_error_set(error, ENOTSUP,
1379 RTE_FLOW_ERROR_TYPE_ACTION,
1381 "action not supported");
1383 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1385 return rte_flow_error_set(error, EINVAL,
1386 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1388 "action configuration not set");
1390 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1392 return rte_flow_error_set(error, ENOTSUP,
1393 RTE_FLOW_ERROR_TYPE_ACTION,
1395 "set actions should be "
1396 "listed successively");
1397 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1398 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1399 pedit_validated = 1;
1400 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1401 (action_flags & MLX5_TCF_FATE_ACTIONS))
1402 return rte_flow_error_set(error, EINVAL,
1403 RTE_FLOW_ERROR_TYPE_ACTION,
1405 "can't have multiple fate"
1407 action_flags |= current_action_flag;
1409 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1410 (action_flags & MLX5_FLOW_ACTION_DROP))
1411 return rte_flow_error_set(error, ENOTSUP,
1412 RTE_FLOW_ERROR_TYPE_ACTION,
1414 "set action is not compatible with "
1416 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1417 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1418 return rte_flow_error_set(error, ENOTSUP,
1419 RTE_FLOW_ERROR_TYPE_ACTION,
1421 "set action must be followed by "
1424 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1425 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1426 return rte_flow_error_set(error, EINVAL,
1427 RTE_FLOW_ERROR_TYPE_ACTION,
1429 "no ipv4 item found in"
1433 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1434 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1435 return rte_flow_error_set(error, EINVAL,
1436 RTE_FLOW_ERROR_TYPE_ACTION,
1438 "no ipv6 item found in"
1442 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1444 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1445 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1446 return rte_flow_error_set(error, EINVAL,
1447 RTE_FLOW_ERROR_TYPE_ACTION,
1449 "no TCP/UDP item found in"
1453 * FW syndrome (0xA9C090):
1454 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1455 * forward to the uplink.
1457 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1458 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1459 ((struct priv *)port_id_dev->data->dev_private)->representor)
1460 return rte_flow_error_set(error, ENOTSUP,
1461 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1462 "vlan push can only be applied"
1463 " when forwarding to uplink port");
1465 * FW syndrome (0x294609):
1466 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1467 * are supported only while forwarding to vport.
1469 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1470 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1471 return rte_flow_error_set(error, ENOTSUP,
1472 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1473 "vlan actions are supported"
1474 " only with port_id action");
1475 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1476 return rte_flow_error_set(error, EINVAL,
1477 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1478 "no fate action is found");
1480 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1482 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1483 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1484 return rte_flow_error_set(error, EINVAL,
1485 RTE_FLOW_ERROR_TYPE_ACTION,
1487 "no IP found in pattern");
1490 (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1491 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1492 return rte_flow_error_set(error, ENOTSUP,
1493 RTE_FLOW_ERROR_TYPE_ACTION,
1495 "no ethernet found in"
1502 * Calculate maximum size of memory for flow items of Linux TC flower and
1503 * extract specified items.
1506 * Pointer to the list of items.
1507 * @param[out] item_flags
1508 * Pointer to the detected items.
1511 * Maximum size of memory for items.
1514 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1515 const struct rte_flow_item items[],
1516 uint64_t *item_flags)
1521 size += SZ_NLATTR_STRZ_OF("flower") +
1522 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1523 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1524 if (attr->group > 0)
1525 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1526 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1527 switch (items->type) {
1528 case RTE_FLOW_ITEM_TYPE_VOID:
1530 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1532 case RTE_FLOW_ITEM_TYPE_ETH:
1533 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1534 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1535 /* dst/src MAC addr and mask. */
1536 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1538 case RTE_FLOW_ITEM_TYPE_VLAN:
1539 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1540 SZ_NLATTR_TYPE_OF(uint16_t) +
1541 /* VLAN Ether type. */
1542 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1543 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1544 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1546 case RTE_FLOW_ITEM_TYPE_IPV4:
1547 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1548 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1549 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1550 /* dst/src IP addr and mask. */
1551 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1553 case RTE_FLOW_ITEM_TYPE_IPV6:
1554 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1555 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1556 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1557 /* dst/src IP addr and mask. */
1558 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1560 case RTE_FLOW_ITEM_TYPE_UDP:
1561 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1562 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1563 /* dst/src port and mask. */
1564 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1566 case RTE_FLOW_ITEM_TYPE_TCP:
1567 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1568 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1569 /* dst/src port and mask. */
1570 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1574 "unsupported item %p type %d,"
1575 " items must be validated before flow creation",
1576 (const void *)items, items->type);
1580 *item_flags = flags;
1585 * Calculate maximum size of memory for flow actions of Linux TC flower and
1586 * extract specified actions.
1588 * @param[in] actions
1589 * Pointer to the list of actions.
1590 * @param[out] action_flags
1591 * Pointer to the detected actions.
1594 * Maximum size of memory for actions.
1597 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1598 uint64_t *action_flags)
1603 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1604 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1605 switch (actions->type) {
1606 case RTE_FLOW_ACTION_TYPE_VOID:
1608 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1609 size += SZ_NLATTR_NEST + /* na_act_index. */
1610 SZ_NLATTR_STRZ_OF("mirred") +
1611 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1612 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1613 flags |= MLX5_FLOW_ACTION_PORT_ID;
1615 case RTE_FLOW_ACTION_TYPE_JUMP:
1616 size += SZ_NLATTR_NEST + /* na_act_index. */
1617 SZ_NLATTR_STRZ_OF("gact") +
1618 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1619 SZ_NLATTR_TYPE_OF(struct tc_gact);
1620 flags |= MLX5_FLOW_ACTION_JUMP;
1622 case RTE_FLOW_ACTION_TYPE_DROP:
1623 size += SZ_NLATTR_NEST + /* na_act_index. */
1624 SZ_NLATTR_STRZ_OF("gact") +
1625 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1626 SZ_NLATTR_TYPE_OF(struct tc_gact);
1627 flags |= MLX5_FLOW_ACTION_DROP;
1629 case RTE_FLOW_ACTION_TYPE_COUNT:
1631 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1632 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1633 goto action_of_vlan;
1634 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1635 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1636 goto action_of_vlan;
1637 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1638 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1639 goto action_of_vlan;
1640 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1641 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1642 goto action_of_vlan;
1644 size += SZ_NLATTR_NEST + /* na_act_index. */
1645 SZ_NLATTR_STRZ_OF("vlan") +
1646 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1647 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1648 SZ_NLATTR_TYPE_OF(uint16_t) +
1649 /* VLAN protocol. */
1650 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1651 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1653 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1654 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1655 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1656 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1657 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1658 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1659 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1660 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1661 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1662 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1663 size += flow_tcf_get_pedit_actions_size(&actions,
1668 "unsupported action %p type %d,"
1669 " items must be validated before flow creation",
1670 (const void *)actions, actions->type);
1674 *action_flags = flags;
1679 * Brand rtnetlink buffer with unique handle.
1681 * This handle should be unique for a given network interface to avoid
1685 * Pointer to Netlink message.
1687 * Unique 32-bit handle to use.
1690 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1692 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1694 tcm->tcm_handle = handle;
1695 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1696 (void *)nlh, handle);
1700 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1701 * memory required, allocates the memory, initializes Netlink message headers
1702 * and set unique TC message handle.
1705 * Pointer to the flow attributes.
1707 * Pointer to the list of items.
1708 * @param[in] actions
1709 * Pointer to the list of actions.
1710 * @param[out] item_flags
1711 * Pointer to bit mask of all items detected.
1712 * @param[out] action_flags
1713 * Pointer to bit mask of all actions detected.
1715 * Pointer to the error structure.
1718 * Pointer to mlx5_flow object on success,
1719 * otherwise NULL and rte_ernno is set.
1721 static struct mlx5_flow *
1722 flow_tcf_prepare(const struct rte_flow_attr *attr,
1723 const struct rte_flow_item items[],
1724 const struct rte_flow_action actions[],
1725 uint64_t *item_flags, uint64_t *action_flags,
1726 struct rte_flow_error *error)
1728 size_t size = sizeof(struct mlx5_flow) +
1729 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1730 MNL_ALIGN(sizeof(struct tcmsg));
1731 struct mlx5_flow *dev_flow;
1732 struct nlmsghdr *nlh;
1735 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1736 size += flow_tcf_get_actions_and_size(actions, action_flags);
1737 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1739 rte_flow_error_set(error, ENOMEM,
1740 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1741 "not enough memory to create E-Switch flow");
1744 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1745 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1746 *dev_flow = (struct mlx5_flow){
1747 .tcf = (struct mlx5_flow_tcf){
1753 * Generate a reasonably unique handle based on the address of the
1756 * This is straightforward on 32-bit systems where the flow pointer can
1757 * be used directly. Otherwise, its least significant part is taken
1758 * after shifting it by the previous power of two of the pointed buffer
1761 if (sizeof(dev_flow) <= 4)
1762 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1764 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1765 rte_log2_u32(rte_align32prevpow2(size)));
1770 * Make adjustments for supporting count actions.
1773 * Pointer to the Ethernet device structure.
1774 * @param[in] dev_flow
1775 * Pointer to mlx5_flow.
1777 * Pointer to error structure.
1780 * 0 On success else a negative errno value is returned and rte_errno is set.
1783 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1784 struct mlx5_flow *dev_flow,
1785 struct rte_flow_error *error)
1787 struct rte_flow *flow = dev_flow->flow;
1789 if (!flow->counter) {
1790 flow->counter = flow_tcf_counter_new();
1792 return rte_flow_error_set(error, rte_errno,
1793 RTE_FLOW_ERROR_TYPE_ACTION,
1795 "cannot get counter"
1802 * Translate flow for Linux TC flower and construct Netlink message.
1805 * Pointer to the priv structure.
1806 * @param[in, out] flow
1807 * Pointer to the sub flow.
1809 * Pointer to the flow attributes.
1811 * Pointer to the list of items.
1812 * @param[in] actions
1813 * Pointer to the list of actions.
1815 * Pointer to the error structure.
1818 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1821 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1822 const struct rte_flow_attr *attr,
1823 const struct rte_flow_item items[],
1824 const struct rte_flow_action actions[],
1825 struct rte_flow_error *error)
1828 const struct rte_flow_item_port_id *port_id;
1829 const struct rte_flow_item_eth *eth;
1830 const struct rte_flow_item_vlan *vlan;
1831 const struct rte_flow_item_ipv4 *ipv4;
1832 const struct rte_flow_item_ipv6 *ipv6;
1833 const struct rte_flow_item_tcp *tcp;
1834 const struct rte_flow_item_udp *udp;
1837 const struct rte_flow_action_port_id *port_id;
1838 const struct rte_flow_action_jump *jump;
1839 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1840 const struct rte_flow_action_of_set_vlan_vid *
1842 const struct rte_flow_action_of_set_vlan_pcp *
1845 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1846 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1847 struct tcmsg *tcm = dev_flow->tcf.tcm;
1848 uint32_t na_act_index_cur;
1849 bool eth_type_set = 0;
1850 bool vlan_present = 0;
1851 bool vlan_eth_type_set = 0;
1852 bool ip_proto_set = 0;
1853 struct nlattr *na_flower;
1854 struct nlattr *na_flower_act;
1855 struct nlattr *na_vlan_id = NULL;
1856 struct nlattr *na_vlan_priority = NULL;
1857 uint64_t item_flags = 0;
1860 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1861 PTOI_TABLE_SZ_MAX(dev)));
1862 nlh = dev_flow->tcf.nlh;
1863 tcm = dev_flow->tcf.tcm;
1864 /* Prepare API must have been called beforehand. */
1865 assert(nlh != NULL && tcm != NULL);
1866 tcm->tcm_family = AF_UNSPEC;
1867 tcm->tcm_ifindex = ptoi[0].ifindex;
1868 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1870 * Priority cannot be zero to prevent the kernel from picking one
1873 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1874 RTE_BE16(ETH_P_ALL));
1875 if (attr->group > 0)
1876 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1877 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1878 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1879 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1880 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1883 switch (items->type) {
1884 case RTE_FLOW_ITEM_TYPE_VOID:
1886 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1887 mask.port_id = flow_tcf_item_mask
1888 (items, &rte_flow_item_port_id_mask,
1889 &flow_tcf_mask_supported.port_id,
1890 &flow_tcf_mask_empty.port_id,
1891 sizeof(flow_tcf_mask_supported.port_id),
1893 assert(mask.port_id);
1894 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1896 spec.port_id = items->spec;
1897 if (!mask.port_id->id)
1900 for (i = 0; ptoi[i].ifindex; ++i)
1901 if (ptoi[i].port_id == spec.port_id->id)
1903 assert(ptoi[i].ifindex);
1904 tcm->tcm_ifindex = ptoi[i].ifindex;
1906 case RTE_FLOW_ITEM_TYPE_ETH:
1907 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1908 mask.eth = flow_tcf_item_mask
1909 (items, &rte_flow_item_eth_mask,
1910 &flow_tcf_mask_supported.eth,
1911 &flow_tcf_mask_empty.eth,
1912 sizeof(flow_tcf_mask_supported.eth),
1915 if (mask.eth == &flow_tcf_mask_empty.eth)
1917 spec.eth = items->spec;
1918 if (mask.eth->type) {
1919 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1923 if (!is_zero_ether_addr(&mask.eth->dst)) {
1924 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1926 spec.eth->dst.addr_bytes);
1927 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1929 mask.eth->dst.addr_bytes);
1931 if (!is_zero_ether_addr(&mask.eth->src)) {
1932 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1934 spec.eth->src.addr_bytes);
1935 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1937 mask.eth->src.addr_bytes);
1940 case RTE_FLOW_ITEM_TYPE_VLAN:
1941 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1942 mask.vlan = flow_tcf_item_mask
1943 (items, &rte_flow_item_vlan_mask,
1944 &flow_tcf_mask_supported.vlan,
1945 &flow_tcf_mask_empty.vlan,
1946 sizeof(flow_tcf_mask_supported.vlan),
1950 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1951 RTE_BE16(ETH_P_8021Q));
1954 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1956 spec.vlan = items->spec;
1957 if (mask.vlan->inner_type) {
1958 mnl_attr_put_u16(nlh,
1959 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1960 spec.vlan->inner_type);
1961 vlan_eth_type_set = 1;
1963 if (mask.vlan->tci & RTE_BE16(0xe000))
1964 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1966 (spec.vlan->tci) >> 13) & 0x7);
1967 if (mask.vlan->tci & RTE_BE16(0x0fff))
1968 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1973 case RTE_FLOW_ITEM_TYPE_IPV4:
1974 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1975 mask.ipv4 = flow_tcf_item_mask
1976 (items, &rte_flow_item_ipv4_mask,
1977 &flow_tcf_mask_supported.ipv4,
1978 &flow_tcf_mask_empty.ipv4,
1979 sizeof(flow_tcf_mask_supported.ipv4),
1982 if (!eth_type_set || !vlan_eth_type_set)
1983 mnl_attr_put_u16(nlh,
1985 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1986 TCA_FLOWER_KEY_ETH_TYPE,
1987 RTE_BE16(ETH_P_IP));
1989 vlan_eth_type_set = 1;
1990 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1992 spec.ipv4 = items->spec;
1993 if (mask.ipv4->hdr.next_proto_id) {
1994 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1995 spec.ipv4->hdr.next_proto_id);
1998 if (mask.ipv4->hdr.src_addr) {
1999 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
2000 spec.ipv4->hdr.src_addr);
2001 mnl_attr_put_u32(nlh,
2002 TCA_FLOWER_KEY_IPV4_SRC_MASK,
2003 mask.ipv4->hdr.src_addr);
2005 if (mask.ipv4->hdr.dst_addr) {
2006 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
2007 spec.ipv4->hdr.dst_addr);
2008 mnl_attr_put_u32(nlh,
2009 TCA_FLOWER_KEY_IPV4_DST_MASK,
2010 mask.ipv4->hdr.dst_addr);
2013 case RTE_FLOW_ITEM_TYPE_IPV6:
2014 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2015 mask.ipv6 = flow_tcf_item_mask
2016 (items, &rte_flow_item_ipv6_mask,
2017 &flow_tcf_mask_supported.ipv6,
2018 &flow_tcf_mask_empty.ipv6,
2019 sizeof(flow_tcf_mask_supported.ipv6),
2022 if (!eth_type_set || !vlan_eth_type_set)
2023 mnl_attr_put_u16(nlh,
2025 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2026 TCA_FLOWER_KEY_ETH_TYPE,
2027 RTE_BE16(ETH_P_IPV6));
2029 vlan_eth_type_set = 1;
2030 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
2032 spec.ipv6 = items->spec;
2033 if (mask.ipv6->hdr.proto) {
2034 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2035 spec.ipv6->hdr.proto);
2038 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
2039 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
2040 sizeof(spec.ipv6->hdr.src_addr),
2041 spec.ipv6->hdr.src_addr);
2042 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
2043 sizeof(mask.ipv6->hdr.src_addr),
2044 mask.ipv6->hdr.src_addr);
2046 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
2047 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
2048 sizeof(spec.ipv6->hdr.dst_addr),
2049 spec.ipv6->hdr.dst_addr);
2050 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
2051 sizeof(mask.ipv6->hdr.dst_addr),
2052 mask.ipv6->hdr.dst_addr);
2055 case RTE_FLOW_ITEM_TYPE_UDP:
2056 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2057 mask.udp = flow_tcf_item_mask
2058 (items, &rte_flow_item_udp_mask,
2059 &flow_tcf_mask_supported.udp,
2060 &flow_tcf_mask_empty.udp,
2061 sizeof(flow_tcf_mask_supported.udp),
2065 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2067 if (mask.udp == &flow_tcf_mask_empty.udp)
2069 spec.udp = items->spec;
2070 if (mask.udp->hdr.src_port) {
2071 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
2072 spec.udp->hdr.src_port);
2073 mnl_attr_put_u16(nlh,
2074 TCA_FLOWER_KEY_UDP_SRC_MASK,
2075 mask.udp->hdr.src_port);
2077 if (mask.udp->hdr.dst_port) {
2078 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
2079 spec.udp->hdr.dst_port);
2080 mnl_attr_put_u16(nlh,
2081 TCA_FLOWER_KEY_UDP_DST_MASK,
2082 mask.udp->hdr.dst_port);
2085 case RTE_FLOW_ITEM_TYPE_TCP:
2086 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2087 mask.tcp = flow_tcf_item_mask
2088 (items, &rte_flow_item_tcp_mask,
2089 &flow_tcf_mask_supported.tcp,
2090 &flow_tcf_mask_empty.tcp,
2091 sizeof(flow_tcf_mask_supported.tcp),
2095 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2097 if (mask.tcp == &flow_tcf_mask_empty.tcp)
2099 spec.tcp = items->spec;
2100 if (mask.tcp->hdr.src_port) {
2101 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2102 spec.tcp->hdr.src_port);
2103 mnl_attr_put_u16(nlh,
2104 TCA_FLOWER_KEY_TCP_SRC_MASK,
2105 mask.tcp->hdr.src_port);
2107 if (mask.tcp->hdr.dst_port) {
2108 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2109 spec.tcp->hdr.dst_port);
2110 mnl_attr_put_u16(nlh,
2111 TCA_FLOWER_KEY_TCP_DST_MASK,
2112 mask.tcp->hdr.dst_port);
2114 if (mask.tcp->hdr.tcp_flags) {
2117 TCA_FLOWER_KEY_TCP_FLAGS,
2119 (spec.tcp->hdr.tcp_flags));
2122 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2124 (mask.tcp->hdr.tcp_flags));
2128 return rte_flow_error_set(error, ENOTSUP,
2129 RTE_FLOW_ERROR_TYPE_ITEM,
2130 NULL, "item not supported");
2133 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2134 na_act_index_cur = 1;
2135 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2136 struct nlattr *na_act_index;
2137 struct nlattr *na_act;
2138 unsigned int vlan_act;
2141 switch (actions->type) {
2142 case RTE_FLOW_ACTION_TYPE_VOID:
2144 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2145 conf.port_id = actions->conf;
2146 if (conf.port_id->original)
2149 for (i = 0; ptoi[i].ifindex; ++i)
2150 if (ptoi[i].port_id == conf.port_id->id)
2152 assert(ptoi[i].ifindex);
2154 mnl_attr_nest_start(nlh, na_act_index_cur++);
2155 assert(na_act_index);
2156 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2157 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2159 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2160 sizeof(struct tc_mirred),
2161 &(struct tc_mirred){
2162 .action = TC_ACT_STOLEN,
2163 .eaction = TCA_EGRESS_REDIR,
2164 .ifindex = ptoi[i].ifindex,
2166 mnl_attr_nest_end(nlh, na_act);
2167 mnl_attr_nest_end(nlh, na_act_index);
2169 case RTE_FLOW_ACTION_TYPE_JUMP:
2170 conf.jump = actions->conf;
2172 mnl_attr_nest_start(nlh, na_act_index_cur++);
2173 assert(na_act_index);
2174 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2175 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2177 mnl_attr_put(nlh, TCA_GACT_PARMS,
2178 sizeof(struct tc_gact),
2180 .action = TC_ACT_GOTO_CHAIN |
2183 mnl_attr_nest_end(nlh, na_act);
2184 mnl_attr_nest_end(nlh, na_act_index);
2186 case RTE_FLOW_ACTION_TYPE_DROP:
2188 mnl_attr_nest_start(nlh, na_act_index_cur++);
2189 assert(na_act_index);
2190 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2191 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2193 mnl_attr_put(nlh, TCA_GACT_PARMS,
2194 sizeof(struct tc_gact),
2196 .action = TC_ACT_SHOT,
2198 mnl_attr_nest_end(nlh, na_act);
2199 mnl_attr_nest_end(nlh, na_act_index);
2201 case RTE_FLOW_ACTION_TYPE_COUNT:
2203 * Driver adds the count action implicitly for
2204 * each rule it creates.
2206 ret = flow_tcf_translate_action_count(dev,
2211 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2212 conf.of_push_vlan = NULL;
2213 vlan_act = TCA_VLAN_ACT_POP;
2214 goto action_of_vlan;
2215 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2216 conf.of_push_vlan = actions->conf;
2217 vlan_act = TCA_VLAN_ACT_PUSH;
2218 goto action_of_vlan;
2219 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2220 conf.of_set_vlan_vid = actions->conf;
2222 goto override_na_vlan_id;
2223 vlan_act = TCA_VLAN_ACT_MODIFY;
2224 goto action_of_vlan;
2225 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2226 conf.of_set_vlan_pcp = actions->conf;
2227 if (na_vlan_priority)
2228 goto override_na_vlan_priority;
2229 vlan_act = TCA_VLAN_ACT_MODIFY;
2230 goto action_of_vlan;
2233 mnl_attr_nest_start(nlh, na_act_index_cur++);
2234 assert(na_act_index);
2235 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2236 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2238 mnl_attr_put(nlh, TCA_VLAN_PARMS,
2239 sizeof(struct tc_vlan),
2241 .action = TC_ACT_PIPE,
2242 .v_action = vlan_act,
2244 if (vlan_act == TCA_VLAN_ACT_POP) {
2245 mnl_attr_nest_end(nlh, na_act);
2246 mnl_attr_nest_end(nlh, na_act_index);
2249 if (vlan_act == TCA_VLAN_ACT_PUSH)
2250 mnl_attr_put_u16(nlh,
2251 TCA_VLAN_PUSH_VLAN_PROTOCOL,
2252 conf.of_push_vlan->ethertype);
2253 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2254 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2255 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2256 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2257 mnl_attr_nest_end(nlh, na_act);
2258 mnl_attr_nest_end(nlh, na_act_index);
2259 if (actions->type ==
2260 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2261 override_na_vlan_id:
2262 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2263 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2265 (conf.of_set_vlan_vid->vlan_vid);
2266 } else if (actions->type ==
2267 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2268 override_na_vlan_priority:
2269 na_vlan_priority->nla_type =
2270 TCA_VLAN_PUSH_VLAN_PRIORITY;
2271 *(uint8_t *)mnl_attr_get_payload
2272 (na_vlan_priority) =
2273 conf.of_set_vlan_pcp->vlan_pcp;
2276 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2277 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2278 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2279 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2280 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2281 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2282 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2283 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2284 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2285 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2287 mnl_attr_nest_start(nlh, na_act_index_cur++);
2288 flow_tcf_create_pedit_mnl_msg(nlh,
2289 &actions, item_flags);
2290 mnl_attr_nest_end(nlh, na_act_index);
2293 return rte_flow_error_set(error, ENOTSUP,
2294 RTE_FLOW_ERROR_TYPE_ACTION,
2296 "action not supported");
2300 assert(na_flower_act);
2301 mnl_attr_nest_end(nlh, na_flower_act);
2302 mnl_attr_nest_end(nlh, na_flower);
2307 * Send Netlink message with acknowledgment.
2310 * Flow context to use.
2312 * Message to send. This function always raises the NLM_F_ACK flag before
2316 * 0 on success, a negative errno value otherwise and rte_errno is set.
2319 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2321 alignas(struct nlmsghdr)
2322 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2323 nlh->nlmsg_len - sizeof(*nlh)];
2324 uint32_t seq = ctx->seq++;
2325 struct mnl_socket *nl = ctx->nl;
2328 nlh->nlmsg_flags |= NLM_F_ACK;
2329 nlh->nlmsg_seq = seq;
2330 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2332 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2335 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2343 * Apply flow to E-Switch by sending Netlink message.
2346 * Pointer to Ethernet device.
2347 * @param[in, out] flow
2348 * Pointer to the sub flow.
2350 * Pointer to the error structure.
2353 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2356 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2357 struct rte_flow_error *error)
2359 struct priv *priv = dev->data->dev_private;
2360 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2361 struct mlx5_flow *dev_flow;
2362 struct nlmsghdr *nlh;
2364 dev_flow = LIST_FIRST(&flow->dev_flows);
2365 /* E-Switch flow can't be expanded. */
2366 assert(!LIST_NEXT(dev_flow, next));
2367 nlh = dev_flow->tcf.nlh;
2368 nlh->nlmsg_type = RTM_NEWTFILTER;
2369 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2370 if (!flow_tcf_nl_ack(ctx, nlh))
2372 return rte_flow_error_set(error, rte_errno,
2373 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2374 "netlink: failed to create TC flow rule");
2378 * Remove flow from E-Switch by sending Netlink message.
2381 * Pointer to Ethernet device.
2382 * @param[in, out] flow
2383 * Pointer to the sub flow.
2386 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2388 struct priv *priv = dev->data->dev_private;
2389 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2390 struct mlx5_flow *dev_flow;
2391 struct nlmsghdr *nlh;
2395 if (flow->counter) {
2396 if (--flow->counter->ref_cnt == 0) {
2397 rte_free(flow->counter);
2398 flow->counter = NULL;
2401 dev_flow = LIST_FIRST(&flow->dev_flows);
2404 /* E-Switch flow can't be expanded. */
2405 assert(!LIST_NEXT(dev_flow, next));
2406 nlh = dev_flow->tcf.nlh;
2407 nlh->nlmsg_type = RTM_DELTFILTER;
2408 nlh->nlmsg_flags = NLM_F_REQUEST;
2409 flow_tcf_nl_ack(ctx, nlh);
2413 * Remove flow from E-Switch and release resources of the device flow.
2416 * Pointer to Ethernet device.
2417 * @param[in, out] flow
2418 * Pointer to the sub flow.
2421 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2423 struct mlx5_flow *dev_flow;
2427 flow_tcf_remove(dev, flow);
2428 dev_flow = LIST_FIRST(&flow->dev_flows);
2431 /* E-Switch flow can't be expanded. */
2432 assert(!LIST_NEXT(dev_flow, next));
2433 LIST_REMOVE(dev_flow, next);
2438 * Helper routine for figuring the space size required for a parse buffer.
2441 * array of values to use.
2443 * Current location in array.
2445 * Value to compare with.
2448 * The maximum between the given value and the array value on index.
2451 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2453 return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2457 * Parse rtnetlink message attributes filling the attribute table with the info
2461 * Attribute table to be filled.
2463 * Maxinum entry in the attribute table.
2465 * The attributes section in the message to be parsed.
2467 * The length of the attributes section in the message.
2470 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2471 struct rtattr *rta, int len)
2473 unsigned short type;
2474 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2475 while (RTA_OK(rta, len)) {
2476 type = rta->rta_type;
2477 if (type <= max && !tb[type])
2479 rta = RTA_NEXT(rta, len);
2484 * Extract flow counters from flower action.
2487 * flower action stats properties in the Netlink message received.
2489 * The backward sequence of rta_types, as written in the attribute table,
2490 * we need to traverse in order to get to the requested object.
2492 * Current location in rta_type table.
2494 * data holding the count statistics of the rte_flow retrieved from
2498 * 0 if data was found and retrieved, -1 otherwise.
2501 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2502 uint16_t rta_type[], int idx,
2503 struct gnet_stats_basic *data)
2505 int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2507 struct rtattr *tbs[tca_stats_max + 1];
2509 if (rta == NULL || idx < 0)
2511 flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2512 RTA_DATA(rta), RTA_PAYLOAD(rta));
2513 switch (rta_type[idx]) {
2514 case TCA_STATS_BASIC:
2515 if (tbs[TCA_STATS_BASIC]) {
2516 memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2517 RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2529 * Parse flower single action retrieving the requested action attribute,
2533 * flower action properties in the Netlink message received.
2535 * The backward sequence of rta_types, as written in the attribute table,
2536 * we need to traverse in order to get to the requested object.
2538 * Current location in rta_type table.
2540 * Count statistics retrieved from the message query.
2543 * 0 if data was found and retrieved, -1 otherwise.
2546 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2547 uint16_t rta_type[], int idx, void *data)
2549 int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2550 struct rtattr *tb[tca_act_max + 1];
2552 if (arg == NULL || idx < 0)
2554 flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2555 RTA_DATA(arg), RTA_PAYLOAD(arg));
2556 if (tb[TCA_ACT_KIND] == NULL)
2558 switch (rta_type[idx]) {
2560 if (tb[TCA_ACT_STATS])
2561 return flow_tcf_nl_action_stats_parse_and_get
2564 (struct gnet_stats_basic *)data);
2573 * Parse flower action section in the message retrieving the requested
2574 * attribute from the first action that provides it.
2577 * flower section in the Netlink message received.
2579 * The backward sequence of rta_types, as written in the attribute table,
2580 * we need to traverse in order to get to the requested object.
2582 * Current location in rta_type table.
2584 * data retrieved from the message query.
2587 * 0 if data was found and retrieved, -1 otherwise.
2590 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2591 uint16_t rta_type[], int idx, void *data)
2593 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2596 if (arg == NULL || idx < 0)
2598 flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2599 RTA_DATA(arg), RTA_PAYLOAD(arg));
2600 switch (rta_type[idx]) {
2602 * flow counters are stored in the actions defined by the flow
2603 * and not in the flow itself, therefore we need to traverse the
2604 * flower chain of actions in search for them.
2606 * Note that the index is not decremented here.
2609 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2611 !flow_tcf_nl_parse_one_action_and_get(tb[i],
2624 * Parse flower classifier options in the message, retrieving the requested
2625 * attribute if found.
2628 * flower section in the Netlink message received.
2630 * The backward sequence of rta_types, as written in the attribute table,
2631 * we need to traverse in order to get to the requested object.
2633 * Current location in rta_type table.
2635 * data retrieved from the message query.
2638 * 0 if data was found and retrieved, -1 otherwise.
2641 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2642 uint16_t rta_type[], int idx, void *data)
2644 int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2646 struct rtattr *tb[tca_flower_max + 1];
2648 if (!opt || idx < 0)
2650 flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2651 RTA_DATA(opt), RTA_PAYLOAD(opt));
2652 switch (rta_type[idx]) {
2653 case TCA_FLOWER_ACT:
2654 if (tb[TCA_FLOWER_ACT])
2655 return flow_tcf_nl_action_parse_and_get
2656 (tb[TCA_FLOWER_ACT],
2657 rta_type, --idx, data);
2666 * Parse Netlink reply on filter query, retrieving the flow counters.
2669 * Message received from Netlink.
2671 * The backward sequence of rta_types, as written in the attribute table,
2672 * we need to traverse in order to get to the requested object.
2674 * Current location in rta_type table.
2676 * data retrieved from the message query.
2679 * 0 if data was found and retrieved, -1 otherwise.
2682 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2683 uint16_t rta_type[], int idx, void *data)
2685 struct nlmsghdr *nlh = cnlh;
2686 struct tcmsg *t = NLMSG_DATA(nlh);
2687 int len = nlh->nlmsg_len;
2688 int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2689 struct rtattr *tb[tca_max + 1];
2693 if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2694 nlh->nlmsg_type != RTM_GETTFILTER &&
2695 nlh->nlmsg_type != RTM_DELTFILTER)
2697 len -= NLMSG_LENGTH(sizeof(*t));
2700 flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2701 /* Not a TC flower flow - bail out */
2702 if (!tb[TCA_KIND] ||
2703 strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2705 switch (rta_type[idx]) {
2707 if (tb[TCA_OPTIONS])
2708 return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2719 * A callback to parse Netlink reply on TC flower query.
2722 * Message received from Netlink.
2724 * Pointer to data area to be filled by the parsing routine.
2725 * assumed to be a pinter to struct flow_tcf_stats_basic.
2731 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2734 * The backward sequence of rta_types to pass in order to get
2737 uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2738 TCA_FLOWER_ACT, TCA_OPTIONS };
2739 struct flow_tcf_stats_basic *sb_data = data;
2741 const struct nlmsghdr *c;
2742 struct nlmsghdr *nc;
2743 } tnlh = { .c = nlh };
2745 if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2746 RTE_DIM(rta_type) - 1,
2747 (void *)&sb_data->counters))
2748 sb_data->valid = true;
2753 * Query a TC flower rule for its statistics via netlink.
2756 * Pointer to Ethernet device.
2758 * Pointer to the sub flow.
2760 * data retrieved by the query.
2762 * Perform verbose error reporting if not NULL.
2765 * 0 on success, a negative errno value otherwise and rte_errno is set.
2768 flow_tcf_query_count(struct rte_eth_dev *dev,
2769 struct rte_flow *flow,
2771 struct rte_flow_error *error)
2773 struct flow_tcf_stats_basic sb_data = { 0 };
2774 struct rte_flow_query_count *qc = data;
2775 struct priv *priv = dev->data->dev_private;
2776 struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2777 struct mnl_socket *nl = ctx->nl;
2778 struct mlx5_flow *dev_flow;
2779 struct nlmsghdr *nlh;
2780 uint32_t seq = priv->tcf_context->seq++;
2784 dev_flow = LIST_FIRST(&flow->dev_flows);
2785 /* E-Switch flow can't be expanded. */
2786 assert(!LIST_NEXT(dev_flow, next));
2787 if (!dev_flow->flow->counter)
2789 nlh = dev_flow->tcf.nlh;
2790 nlh->nlmsg_type = RTM_GETTFILTER;
2791 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2792 nlh->nlmsg_seq = seq;
2793 if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2796 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2799 ret = mnl_cb_run(ctx->buf, ret, seq,
2800 mnl_socket_get_portid(nl),
2801 flow_tcf_nl_message_get_stats_basic,
2804 /* Return the delta from last reset. */
2805 if (sb_data.valid) {
2806 /* Return the delta from last reset. */
2809 qc->hits = sb_data.counters.packets - flow->counter->hits;
2810 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2812 flow->counter->hits = sb_data.counters.packets;
2813 flow->counter->bytes = sb_data.counters.bytes;
2817 return rte_flow_error_set(error, EINVAL,
2818 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2820 "flow does not have counter");
2822 return rte_flow_error_set
2823 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2824 NULL, "netlink: failed to read flow rule counters");
2826 return rte_flow_error_set
2827 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2828 NULL, "counters are not available.");
2834 * @see rte_flow_query()
2838 flow_tcf_query(struct rte_eth_dev *dev,
2839 struct rte_flow *flow,
2840 const struct rte_flow_action *actions,
2842 struct rte_flow_error *error)
2846 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2847 switch (actions->type) {
2848 case RTE_FLOW_ACTION_TYPE_VOID:
2850 case RTE_FLOW_ACTION_TYPE_COUNT:
2851 ret = flow_tcf_query_count(dev, flow, data, error);
2854 return rte_flow_error_set(error, ENOTSUP,
2855 RTE_FLOW_ERROR_TYPE_ACTION,
2857 "action not supported");
2863 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2864 .validate = flow_tcf_validate,
2865 .prepare = flow_tcf_prepare,
2866 .translate = flow_tcf_translate,
2867 .apply = flow_tcf_apply,
2868 .remove = flow_tcf_remove,
2869 .destroy = flow_tcf_destroy,
2870 .query = flow_tcf_query,
2874 * Create and configure a libmnl socket for Netlink flow rules.
2877 * A valid libmnl socket object pointer on success, NULL otherwise and
2880 static struct mnl_socket *
2881 flow_tcf_mnl_socket_create(void)
2883 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2886 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2888 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2893 mnl_socket_close(nl);
2898 * Destroy a libmnl socket.
2901 * Libmnl socket of the @p NETLINK_ROUTE kind.
2904 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2907 mnl_socket_close(nl);
2911 * Initialize ingress qdisc of a given network interface.
2914 * Pointer to tc-flower context to use.
2916 * Index of network interface to initialize.
2918 * Perform verbose error reporting if not NULL.
2921 * 0 on success, a negative errno value otherwise and rte_errno is set.
2924 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2925 unsigned int ifindex, struct rte_flow_error *error)
2927 struct nlmsghdr *nlh;
2929 alignas(struct nlmsghdr)
2930 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2932 /* Destroy existing ingress qdisc and everything attached to it. */
2933 nlh = mnl_nlmsg_put_header(buf);
2934 nlh->nlmsg_type = RTM_DELQDISC;
2935 nlh->nlmsg_flags = NLM_F_REQUEST;
2936 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2937 tcm->tcm_family = AF_UNSPEC;
2938 tcm->tcm_ifindex = ifindex;
2939 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2940 tcm->tcm_parent = TC_H_INGRESS;
2941 /* Ignore errors when qdisc is already absent. */
2942 if (flow_tcf_nl_ack(ctx, nlh) &&
2943 rte_errno != EINVAL && rte_errno != ENOENT)
2944 return rte_flow_error_set(error, rte_errno,
2945 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2946 "netlink: failed to remove ingress"
2948 /* Create fresh ingress qdisc. */
2949 nlh = mnl_nlmsg_put_header(buf);
2950 nlh->nlmsg_type = RTM_NEWQDISC;
2951 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2952 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2953 tcm->tcm_family = AF_UNSPEC;
2954 tcm->tcm_ifindex = ifindex;
2955 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2956 tcm->tcm_parent = TC_H_INGRESS;
2957 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2958 if (flow_tcf_nl_ack(ctx, nlh))
2959 return rte_flow_error_set(error, rte_errno,
2960 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2961 "netlink: failed to create ingress"
2967 * Create libmnl context for Netlink flow rules.
2970 * A valid libmnl socket object pointer on success, NULL otherwise and
2973 struct mlx5_flow_tcf_context *
2974 mlx5_flow_tcf_context_create(void)
2976 struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2981 ctx->nl = flow_tcf_mnl_socket_create();
2984 ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2985 ctx->buf = rte_zmalloc(__func__,
2986 ctx->buf_size, sizeof(uint32_t));
2989 ctx->seq = random();
2992 mlx5_flow_tcf_context_destroy(ctx);
2997 * Destroy a libmnl context.
3000 * Libmnl socket of the @p NETLINK_ROUTE kind.
3003 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
3007 flow_tcf_mnl_socket_destroy(ctx->nl);