1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
105 struct tc_pedit_sel {
109 struct tc_pedit_key keys[0];
112 #endif /* HAVE_TC_ACT_VLAN */
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
128 #ifndef HAVE_TCA_CHAIN
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
238 /** Empty masks for known item types. */
240 struct rte_flow_item_port_id port_id;
241 struct rte_flow_item_eth eth;
242 struct rte_flow_item_vlan vlan;
243 struct rte_flow_item_ipv4 ipv4;
244 struct rte_flow_item_ipv6 ipv6;
245 struct rte_flow_item_tcp tcp;
246 struct rte_flow_item_udp udp;
247 } flow_tcf_mask_empty;
249 /** Supported masks for known item types. */
250 static const struct {
251 struct rte_flow_item_port_id port_id;
252 struct rte_flow_item_eth eth;
253 struct rte_flow_item_vlan vlan;
254 struct rte_flow_item_ipv4 ipv4;
255 struct rte_flow_item_ipv6 ipv6;
256 struct rte_flow_item_tcp tcp;
257 struct rte_flow_item_udp udp;
258 } flow_tcf_mask_supported = {
263 .type = RTE_BE16(0xffff),
264 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
265 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
268 /* PCP and VID only, no DEI. */
269 .tci = RTE_BE16(0xefff),
270 .inner_type = RTE_BE16(0xffff),
273 .next_proto_id = 0xff,
274 .src_addr = RTE_BE32(0xffffffff),
275 .dst_addr = RTE_BE32(0xffffffff),
280 "\xff\xff\xff\xff\xff\xff\xff\xff"
281 "\xff\xff\xff\xff\xff\xff\xff\xff",
283 "\xff\xff\xff\xff\xff\xff\xff\xff"
284 "\xff\xff\xff\xff\xff\xff\xff\xff",
287 .src_port = RTE_BE16(0xffff),
288 .dst_port = RTE_BE16(0xffff),
292 .src_port = RTE_BE16(0xffff),
293 .dst_port = RTE_BE16(0xffff),
297 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
298 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
299 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
300 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
301 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
303 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
305 /** DPDK port to network interface index (ifindex) conversion. */
306 struct flow_tcf_ptoi {
307 uint16_t port_id; /**< DPDK port ID. */
308 unsigned int ifindex; /**< Network interface index. */
311 /* Due to a limitation on driver/FW. */
312 #define MLX5_TCF_GROUP_ID_MAX 3
313 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
315 #define MLX5_TCF_FATE_ACTIONS \
316 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
317 MLX5_FLOW_ACTION_JUMP)
319 #define MLX5_TCF_VLAN_ACTIONS \
320 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
321 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
323 #define MLX5_TCF_PEDIT_ACTIONS \
324 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
325 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
326 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
327 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)
329 #define MLX5_TCF_CONFIG_ACTIONS \
330 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
331 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
332 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
333 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
335 #define MAX_PEDIT_KEYS 128
336 #define SZ_PEDIT_KEY_VAL 4
338 #define NUM_OF_PEDIT_KEYS(sz) \
339 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
341 struct pedit_key_ex {
342 enum pedit_header_type htype;
346 struct pedit_parser {
347 struct tc_pedit_sel sel;
348 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
349 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
354 * Set pedit key of decrease/set ttl
357 * pointer to action specification
358 * @param[in,out] p_parser
359 * pointer to pedit_parser
360 * @param[in] item_flags
361 * flags of all items presented
364 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
365 struct pedit_parser *p_parser,
368 int idx = p_parser->sel.nkeys;
370 p_parser->keys[idx].mask = 0xFFFFFF00;
371 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
372 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
373 p_parser->keys[idx].off =
374 offsetof(struct ipv4_hdr, time_to_live);
376 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
377 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
378 p_parser->keys[idx].off =
379 offsetof(struct ipv6_hdr, hop_limits);
381 if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
382 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
383 p_parser->keys[idx].val = 0x000000FF;
385 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
386 p_parser->keys[idx].val =
387 (__u32)((const struct rte_flow_action_set_ttl *)
388 actions->conf)->ttl_value;
390 p_parser->sel.nkeys = (++idx);
394 * Set pedit key of transport (TCP/UDP) port value
397 * pointer to action specification
398 * @param[in,out] p_parser
399 * pointer to pedit_parser
400 * @param[in] item_flags
401 * flags of all items presented
404 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
405 struct pedit_parser *p_parser,
408 int idx = p_parser->sel.nkeys;
410 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
411 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
412 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
413 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
414 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
415 /* offset of src/dst port is same for TCP and UDP */
416 p_parser->keys[idx].off =
417 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
418 offsetof(struct tcp_hdr, src_port) :
419 offsetof(struct tcp_hdr, dst_port);
420 p_parser->keys[idx].mask = 0xFFFF0000;
421 p_parser->keys[idx].val =
422 (__u32)((const struct rte_flow_action_set_tp *)
423 actions->conf)->port;
424 p_parser->sel.nkeys = (++idx);
428 * Set pedit key of ipv6 address
431 * pointer to action specification
432 * @param[in,out] p_parser
433 * pointer to pedit_parser
436 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
437 struct pedit_parser *p_parser)
439 int idx = p_parser->sel.nkeys;
440 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
442 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
443 offsetof(struct ipv6_hdr, src_addr) :
444 offsetof(struct ipv6_hdr, dst_addr);
445 const struct rte_flow_action_set_ipv6 *conf =
446 (const struct rte_flow_action_set_ipv6 *)actions->conf;
448 for (int i = 0; i < keys; i++, idx++) {
449 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
450 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
451 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
452 p_parser->keys[idx].mask = ~UINT32_MAX;
453 memcpy(&p_parser->keys[idx].val,
454 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
457 p_parser->sel.nkeys += keys;
461 * Set pedit key of ipv4 address
464 * pointer to action specification
465 * @param[in,out] p_parser
466 * pointer to pedit_parser
469 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
470 struct pedit_parser *p_parser)
472 int idx = p_parser->sel.nkeys;
474 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
475 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
476 p_parser->keys[idx].off =
477 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
478 offsetof(struct ipv4_hdr, src_addr) :
479 offsetof(struct ipv4_hdr, dst_addr);
480 p_parser->keys[idx].mask = ~UINT32_MAX;
481 p_parser->keys[idx].val =
482 ((const struct rte_flow_action_set_ipv4 *)
483 actions->conf)->ipv4_addr;
484 p_parser->sel.nkeys = (++idx);
488 * Create the pedit's na attribute in netlink message
489 * on pre-allocate message buffer
492 * pointer to pre-allocated netlink message buffer
493 * @param[in,out] actions
494 * pointer to pointer of actions specification.
495 * @param[in,out] action_flags
496 * pointer to actions flags
497 * @param[in] item_flags
498 * flags of all item presented
501 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
502 const struct rte_flow_action **actions,
505 struct pedit_parser p_parser;
506 struct nlattr *na_act_options;
507 struct nlattr *na_pedit_keys;
509 memset(&p_parser, 0, sizeof(p_parser));
510 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
511 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
512 /* all modify header actions should be in one tc-pedit action */
513 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
514 switch ((*actions)->type) {
515 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
516 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
517 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
519 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
520 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
521 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
523 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
524 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
525 flow_tcf_pedit_key_set_tp_port(*actions,
526 &p_parser, item_flags);
528 case RTE_FLOW_ACTION_TYPE_SET_TTL:
529 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
530 flow_tcf_pedit_key_set_dec_ttl(*actions,
531 &p_parser, item_flags);
534 goto pedit_mnl_msg_done;
538 p_parser.sel.action = TC_ACT_PIPE;
539 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
540 sizeof(p_parser.sel) +
541 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
544 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
545 for (int i = 0; i < p_parser.sel.nkeys; i++) {
546 struct nlattr *na_pedit_key =
547 mnl_attr_nest_start(nl,
548 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
549 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
550 p_parser.keys_ex[i].htype);
551 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
552 p_parser.keys_ex[i].cmd);
553 mnl_attr_nest_end(nl, na_pedit_key);
555 mnl_attr_nest_end(nl, na_pedit_keys);
556 mnl_attr_nest_end(nl, na_act_options);
561 * Calculate max memory size of one TC-pedit actions.
562 * One TC-pedit action can contain set of keys each defining
563 * a rewrite element (rte_flow action)
565 * @param[in,out] actions
566 * actions specification.
567 * @param[in,out] action_flags
569 * @param[in,out] size
572 * Max memory size of one TC-pedit action
575 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
576 uint64_t *action_flags)
582 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
583 SZ_NLATTR_STRZ_OF("pedit") +
584 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
585 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
586 switch ((*actions)->type) {
587 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
588 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
589 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
591 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
592 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
593 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
595 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
596 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
597 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
599 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
600 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
601 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
603 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
604 /* TCP is as same as UDP */
605 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
606 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
608 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
609 /* TCP is as same as UDP */
610 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
611 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
613 case RTE_FLOW_ACTION_TYPE_SET_TTL:
614 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
615 flags |= MLX5_FLOW_ACTION_SET_TTL;
617 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
618 keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
619 flags |= MLX5_FLOW_ACTION_DEC_TTL;
622 goto get_pedit_action_size_done;
625 get_pedit_action_size_done:
626 /* TCA_PEDIT_PARAMS_EX */
628 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
629 keys * sizeof(struct tc_pedit_key));
630 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
632 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
633 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
634 SZ_NLATTR_DATA_OF(2));
635 (*action_flags) |= flags;
641 * Retrieve mask for pattern item.
643 * This function does basic sanity checks on a pattern item in order to
644 * return the most appropriate mask for it.
647 * Item specification.
648 * @param[in] mask_default
649 * Default mask for pattern item as specified by the flow API.
650 * @param[in] mask_supported
651 * Mask fields supported by the implementation.
652 * @param[in] mask_empty
653 * Empty mask to return when there is no specification.
655 * Perform verbose error reporting if not NULL.
658 * Either @p item->mask or one of the mask parameters on success, NULL
659 * otherwise and rte_errno is set.
662 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
663 const void *mask_supported, const void *mask_empty,
664 size_t mask_size, struct rte_flow_error *error)
669 /* item->last and item->mask cannot exist without item->spec. */
670 if (!item->spec && (item->mask || item->last)) {
671 rte_flow_error_set(error, EINVAL,
672 RTE_FLOW_ERROR_TYPE_ITEM, item,
673 "\"mask\" or \"last\" field provided without"
674 " a corresponding \"spec\"");
677 /* No spec, no mask, no problem. */
680 mask = item->mask ? item->mask : mask_default;
683 * Single-pass check to make sure that:
684 * - Mask is supported, no bits are set outside mask_supported.
685 * - Both item->spec and item->last are included in mask.
687 for (i = 0; i != mask_size; ++i) {
690 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
691 ((const uint8_t *)mask_supported)[i]) {
692 rte_flow_error_set(error, ENOTSUP,
693 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
694 "unsupported field found"
699 (((const uint8_t *)item->spec)[i] & mask[i]) !=
700 (((const uint8_t *)item->last)[i] & mask[i])) {
701 rte_flow_error_set(error, EINVAL,
702 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
704 "range between \"spec\" and \"last\""
705 " not comprised in \"mask\"");
713 * Build a conversion table between port ID and ifindex.
716 * Pointer to Ethernet device.
718 * Pointer to ptoi table.
720 * Size of ptoi table provided.
723 * Size of ptoi table filled.
726 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
729 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
730 uint16_t port_id[n + 1];
732 unsigned int own = 0;
734 /* At least one port is needed when no switch domain is present. */
737 port_id[0] = dev->data->port_id;
739 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
743 for (i = 0; i != n; ++i) {
744 struct rte_eth_dev_info dev_info;
746 rte_eth_dev_info_get(port_id[i], &dev_info);
747 if (port_id[i] == dev->data->port_id)
749 ptoi[i].port_id = port_id[i];
750 ptoi[i].ifindex = dev_info.if_index;
752 /* Ensure first entry of ptoi[] is the current device. */
758 /* An entry with zero ifindex terminates ptoi[]. */
765 * Verify the @p attr will be correctly understood by the E-switch.
768 * Pointer to flow attributes
770 * Pointer to error structure.
773 * 0 on success, a negative errno value otherwise and rte_errno is set.
776 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
777 struct rte_flow_error *error)
780 * Supported attributes: groups, some priorities and ingress only.
781 * group is supported only if kernel supports chain. Don't care about
782 * transfer as it is the caller's problem.
784 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
785 return rte_flow_error_set(error, ENOTSUP,
786 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
787 "group ID larger than "
788 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
790 else if (attr->group > 0 &&
791 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
792 return rte_flow_error_set(error, ENOTSUP,
793 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
795 "lowest priority level is "
796 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
797 " when group is configured");
798 else if (attr->priority > 0xfffe)
799 return rte_flow_error_set(error, ENOTSUP,
800 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
802 "lowest priority level is 0xfffe");
804 return rte_flow_error_set(error, EINVAL,
805 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
806 attr, "only ingress is supported");
808 return rte_flow_error_set(error, ENOTSUP,
809 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
810 attr, "egress is not supported");
815 * Validate flow for E-Switch.
818 * Pointer to the priv structure.
820 * Pointer to the flow attributes.
822 * Pointer to the list of items.
824 * Pointer to the list of actions.
826 * Pointer to the error structure.
829 * 0 on success, a negative errno value otherwise and rte_ernno is set.
832 flow_tcf_validate(struct rte_eth_dev *dev,
833 const struct rte_flow_attr *attr,
834 const struct rte_flow_item items[],
835 const struct rte_flow_action actions[],
836 struct rte_flow_error *error)
839 const struct rte_flow_item_port_id *port_id;
840 const struct rte_flow_item_eth *eth;
841 const struct rte_flow_item_vlan *vlan;
842 const struct rte_flow_item_ipv4 *ipv4;
843 const struct rte_flow_item_ipv6 *ipv6;
844 const struct rte_flow_item_tcp *tcp;
845 const struct rte_flow_item_udp *udp;
848 const struct rte_flow_action_port_id *port_id;
849 const struct rte_flow_action_jump *jump;
850 const struct rte_flow_action_of_push_vlan *of_push_vlan;
851 const struct rte_flow_action_of_set_vlan_vid *
853 const struct rte_flow_action_of_set_vlan_pcp *
855 const struct rte_flow_action_set_ipv4 *set_ipv4;
856 const struct rte_flow_action_set_ipv6 *set_ipv6;
858 uint32_t item_flags = 0;
859 uint32_t action_flags = 0;
860 uint8_t next_protocol = -1;
861 unsigned int tcm_ifindex = 0;
862 uint8_t pedit_validated = 0;
863 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
864 struct rte_eth_dev *port_id_dev = NULL;
868 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
869 PTOI_TABLE_SZ_MAX(dev)));
870 ret = flow_tcf_validate_attributes(attr, error);
873 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
876 switch (items->type) {
877 case RTE_FLOW_ITEM_TYPE_VOID:
879 case RTE_FLOW_ITEM_TYPE_PORT_ID:
880 mask.port_id = flow_tcf_item_mask
881 (items, &rte_flow_item_port_id_mask,
882 &flow_tcf_mask_supported.port_id,
883 &flow_tcf_mask_empty.port_id,
884 sizeof(flow_tcf_mask_supported.port_id),
888 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
892 spec.port_id = items->spec;
893 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
894 return rte_flow_error_set
896 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
898 "no support for partial mask on"
900 if (!mask.port_id->id)
903 for (i = 0; ptoi[i].ifindex; ++i)
904 if (ptoi[i].port_id == spec.port_id->id)
906 if (!ptoi[i].ifindex)
907 return rte_flow_error_set
909 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
911 "missing data to convert port ID to"
913 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
914 return rte_flow_error_set
916 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
918 "cannot match traffic for"
919 " several port IDs through"
920 " a single flow rule");
921 tcm_ifindex = ptoi[i].ifindex;
924 case RTE_FLOW_ITEM_TYPE_ETH:
925 ret = mlx5_flow_validate_item_eth(items, item_flags,
929 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
931 * Redundant check due to different supported mask.
932 * Same for the rest of items.
934 mask.eth = flow_tcf_item_mask
935 (items, &rte_flow_item_eth_mask,
936 &flow_tcf_mask_supported.eth,
937 &flow_tcf_mask_empty.eth,
938 sizeof(flow_tcf_mask_supported.eth),
942 if (mask.eth->type && mask.eth->type !=
944 return rte_flow_error_set
946 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
948 "no support for partial mask on"
951 case RTE_FLOW_ITEM_TYPE_VLAN:
952 ret = mlx5_flow_validate_item_vlan(items, item_flags,
956 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
957 mask.vlan = flow_tcf_item_mask
958 (items, &rte_flow_item_vlan_mask,
959 &flow_tcf_mask_supported.vlan,
960 &flow_tcf_mask_empty.vlan,
961 sizeof(flow_tcf_mask_supported.vlan),
965 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
966 (mask.vlan->tci & RTE_BE16(0xe000)) !=
968 (mask.vlan->tci & RTE_BE16(0x0fff) &&
969 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
971 (mask.vlan->inner_type &&
972 mask.vlan->inner_type != RTE_BE16(0xffff)))
973 return rte_flow_error_set
975 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
977 "no support for partial masks on"
978 " \"tci\" (PCP and VID parts) and"
979 " \"inner_type\" fields");
981 case RTE_FLOW_ITEM_TYPE_IPV4:
982 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
986 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
987 mask.ipv4 = flow_tcf_item_mask
988 (items, &rte_flow_item_ipv4_mask,
989 &flow_tcf_mask_supported.ipv4,
990 &flow_tcf_mask_empty.ipv4,
991 sizeof(flow_tcf_mask_supported.ipv4),
995 if (mask.ipv4->hdr.next_proto_id &&
996 mask.ipv4->hdr.next_proto_id != 0xff)
997 return rte_flow_error_set
999 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1001 "no support for partial mask on"
1002 " \"hdr.next_proto_id\" field");
1003 else if (mask.ipv4->hdr.next_proto_id)
1005 ((const struct rte_flow_item_ipv4 *)
1006 (items->spec))->hdr.next_proto_id;
1008 case RTE_FLOW_ITEM_TYPE_IPV6:
1009 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1013 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1014 mask.ipv6 = flow_tcf_item_mask
1015 (items, &rte_flow_item_ipv6_mask,
1016 &flow_tcf_mask_supported.ipv6,
1017 &flow_tcf_mask_empty.ipv6,
1018 sizeof(flow_tcf_mask_supported.ipv6),
1022 if (mask.ipv6->hdr.proto &&
1023 mask.ipv6->hdr.proto != 0xff)
1024 return rte_flow_error_set
1026 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1028 "no support for partial mask on"
1029 " \"hdr.proto\" field");
1030 else if (mask.ipv6->hdr.proto)
1032 ((const struct rte_flow_item_ipv6 *)
1033 (items->spec))->hdr.proto;
1035 case RTE_FLOW_ITEM_TYPE_UDP:
1036 ret = mlx5_flow_validate_item_udp(items, item_flags,
1037 next_protocol, error);
1040 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1041 mask.udp = flow_tcf_item_mask
1042 (items, &rte_flow_item_udp_mask,
1043 &flow_tcf_mask_supported.udp,
1044 &flow_tcf_mask_empty.udp,
1045 sizeof(flow_tcf_mask_supported.udp),
1050 case RTE_FLOW_ITEM_TYPE_TCP:
1051 ret = mlx5_flow_validate_item_tcp
1054 &flow_tcf_mask_supported.tcp,
1058 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1059 mask.tcp = flow_tcf_item_mask
1060 (items, &rte_flow_item_tcp_mask,
1061 &flow_tcf_mask_supported.tcp,
1062 &flow_tcf_mask_empty.tcp,
1063 sizeof(flow_tcf_mask_supported.tcp),
1069 return rte_flow_error_set(error, ENOTSUP,
1070 RTE_FLOW_ERROR_TYPE_ITEM,
1071 NULL, "item not supported");
1074 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1076 uint32_t current_action_flag = 0;
1078 switch (actions->type) {
1079 case RTE_FLOW_ACTION_TYPE_VOID:
1081 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1082 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1085 conf.port_id = actions->conf;
1086 if (conf.port_id->original)
1089 for (i = 0; ptoi[i].ifindex; ++i)
1090 if (ptoi[i].port_id == conf.port_id->id)
1092 if (!ptoi[i].ifindex)
1093 return rte_flow_error_set
1095 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1097 "missing data to convert port ID to"
1099 port_id_dev = &rte_eth_devices[conf.port_id->id];
1101 case RTE_FLOW_ACTION_TYPE_JUMP:
1102 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1105 conf.jump = actions->conf;
1106 if (attr->group >= conf.jump->group)
1107 return rte_flow_error_set
1109 RTE_FLOW_ERROR_TYPE_ACTION,
1111 "can jump only to a group forward");
1113 case RTE_FLOW_ACTION_TYPE_DROP:
1114 current_action_flag = MLX5_FLOW_ACTION_DROP;
1116 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1117 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1119 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1120 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1122 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1123 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1124 return rte_flow_error_set
1126 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1127 "vlan modify is not supported,"
1128 " set action must follow push action");
1129 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1131 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1132 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1133 return rte_flow_error_set
1135 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1136 "vlan modify is not supported,"
1137 " set action must follow push action");
1138 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1140 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1141 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1143 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1144 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1146 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1147 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1149 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1150 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1152 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1153 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1155 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1156 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1158 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1159 current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1161 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1162 current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1165 return rte_flow_error_set(error, ENOTSUP,
1166 RTE_FLOW_ERROR_TYPE_ACTION,
1168 "action not supported");
1170 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1172 return rte_flow_error_set(error, EINVAL,
1173 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1175 "action configuration not set");
1177 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1179 return rte_flow_error_set(error, ENOTSUP,
1180 RTE_FLOW_ERROR_TYPE_ACTION,
1182 "set actions should be "
1183 "listed successively");
1184 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1185 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1186 pedit_validated = 1;
1187 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1188 (action_flags & MLX5_TCF_FATE_ACTIONS))
1189 return rte_flow_error_set(error, EINVAL,
1190 RTE_FLOW_ERROR_TYPE_ACTION,
1192 "can't have multiple fate"
1194 action_flags |= current_action_flag;
1196 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1197 (action_flags & MLX5_FLOW_ACTION_DROP))
1198 return rte_flow_error_set(error, ENOTSUP,
1199 RTE_FLOW_ERROR_TYPE_ACTION,
1201 "set action is not compatible with "
1203 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1204 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1205 return rte_flow_error_set(error, ENOTSUP,
1206 RTE_FLOW_ERROR_TYPE_ACTION,
1208 "set action must be followed by "
1211 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1212 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1213 return rte_flow_error_set(error, EINVAL,
1214 RTE_FLOW_ERROR_TYPE_ACTION,
1216 "no ipv4 item found in"
1220 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1221 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1222 return rte_flow_error_set(error, EINVAL,
1223 RTE_FLOW_ERROR_TYPE_ACTION,
1225 "no ipv6 item found in"
1229 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1231 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1232 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1233 return rte_flow_error_set(error, EINVAL,
1234 RTE_FLOW_ERROR_TYPE_ACTION,
1236 "no TCP/UDP item found in"
1240 * FW syndrome (0xA9C090):
1241 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1242 * forward to the uplink.
1244 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1245 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1246 ((struct priv *)port_id_dev->data->dev_private)->representor)
1247 return rte_flow_error_set(error, ENOTSUP,
1248 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1249 "vlan push can only be applied"
1250 " when forwarding to uplink port");
1252 * FW syndrome (0x294609):
1253 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1254 * are supported only while forwarding to vport.
1256 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1257 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1258 return rte_flow_error_set(error, ENOTSUP,
1259 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1260 "vlan actions are supported"
1261 " only with port_id action");
1262 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1263 return rte_flow_error_set(error, EINVAL,
1264 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1265 "no fate action is found");
1267 (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1269 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1270 MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1271 return rte_flow_error_set(error, EINVAL,
1272 RTE_FLOW_ERROR_TYPE_ACTION,
1274 "no IP found in pattern");
1280 * Calculate maximum size of memory for flow items of Linux TC flower and
1281 * extract specified items.
1284 * Pointer to the list of items.
1285 * @param[out] item_flags
1286 * Pointer to the detected items.
1289 * Maximum size of memory for items.
1292 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1293 const struct rte_flow_item items[],
1294 uint64_t *item_flags)
1299 size += SZ_NLATTR_STRZ_OF("flower") +
1300 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1301 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1302 if (attr->group > 0)
1303 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1304 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1305 switch (items->type) {
1306 case RTE_FLOW_ITEM_TYPE_VOID:
1308 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1310 case RTE_FLOW_ITEM_TYPE_ETH:
1311 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1312 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1313 /* dst/src MAC addr and mask. */
1314 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1316 case RTE_FLOW_ITEM_TYPE_VLAN:
1317 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1318 SZ_NLATTR_TYPE_OF(uint16_t) +
1319 /* VLAN Ether type. */
1320 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1321 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1322 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1324 case RTE_FLOW_ITEM_TYPE_IPV4:
1325 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1326 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1327 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1328 /* dst/src IP addr and mask. */
1329 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1331 case RTE_FLOW_ITEM_TYPE_IPV6:
1332 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1333 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1334 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1335 /* dst/src IP addr and mask. */
1336 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1338 case RTE_FLOW_ITEM_TYPE_UDP:
1339 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1340 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1341 /* dst/src port and mask. */
1342 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1344 case RTE_FLOW_ITEM_TYPE_TCP:
1345 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1346 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1347 /* dst/src port and mask. */
1348 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1352 "unsupported item %p type %d,"
1353 " items must be validated before flow creation",
1354 (const void *)items, items->type);
1358 *item_flags = flags;
1363 * Calculate maximum size of memory for flow actions of Linux TC flower and
1364 * extract specified actions.
1366 * @param[in] actions
1367 * Pointer to the list of actions.
1368 * @param[out] action_flags
1369 * Pointer to the detected actions.
1372 * Maximum size of memory for actions.
1375 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1376 uint64_t *action_flags)
1381 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1382 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1383 switch (actions->type) {
1384 case RTE_FLOW_ACTION_TYPE_VOID:
1386 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1387 size += SZ_NLATTR_NEST + /* na_act_index. */
1388 SZ_NLATTR_STRZ_OF("mirred") +
1389 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1390 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1391 flags |= MLX5_FLOW_ACTION_PORT_ID;
1393 case RTE_FLOW_ACTION_TYPE_JUMP:
1394 size += SZ_NLATTR_NEST + /* na_act_index. */
1395 SZ_NLATTR_STRZ_OF("gact") +
1396 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1397 SZ_NLATTR_TYPE_OF(struct tc_gact);
1398 flags |= MLX5_FLOW_ACTION_JUMP;
1400 case RTE_FLOW_ACTION_TYPE_DROP:
1401 size += SZ_NLATTR_NEST + /* na_act_index. */
1402 SZ_NLATTR_STRZ_OF("gact") +
1403 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1404 SZ_NLATTR_TYPE_OF(struct tc_gact);
1405 flags |= MLX5_FLOW_ACTION_DROP;
1407 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1408 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1409 goto action_of_vlan;
1410 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1411 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1412 goto action_of_vlan;
1413 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1414 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1415 goto action_of_vlan;
1416 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1417 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1418 goto action_of_vlan;
1420 size += SZ_NLATTR_NEST + /* na_act_index. */
1421 SZ_NLATTR_STRZ_OF("vlan") +
1422 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1423 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1424 SZ_NLATTR_TYPE_OF(uint16_t) +
1425 /* VLAN protocol. */
1426 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1427 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1429 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1430 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1431 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1432 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1433 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1434 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1435 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1436 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1437 size += flow_tcf_get_pedit_actions_size(&actions,
1442 "unsupported action %p type %d,"
1443 " items must be validated before flow creation",
1444 (const void *)actions, actions->type);
1448 *action_flags = flags;
1453 * Brand rtnetlink buffer with unique handle.
1455 * This handle should be unique for a given network interface to avoid
1459 * Pointer to Netlink message.
1461 * Unique 32-bit handle to use.
1464 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1466 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1468 tcm->tcm_handle = handle;
1469 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1470 (void *)nlh, handle);
1474 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1475 * memory required, allocates the memory, initializes Netlink message headers
1476 * and set unique TC message handle.
1479 * Pointer to the flow attributes.
1481 * Pointer to the list of items.
1482 * @param[in] actions
1483 * Pointer to the list of actions.
1484 * @param[out] item_flags
1485 * Pointer to bit mask of all items detected.
1486 * @param[out] action_flags
1487 * Pointer to bit mask of all actions detected.
1489 * Pointer to the error structure.
1492 * Pointer to mlx5_flow object on success,
1493 * otherwise NULL and rte_ernno is set.
1495 static struct mlx5_flow *
1496 flow_tcf_prepare(const struct rte_flow_attr *attr,
1497 const struct rte_flow_item items[],
1498 const struct rte_flow_action actions[],
1499 uint64_t *item_flags, uint64_t *action_flags,
1500 struct rte_flow_error *error)
1502 size_t size = sizeof(struct mlx5_flow) +
1503 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1504 MNL_ALIGN(sizeof(struct tcmsg));
1505 struct mlx5_flow *dev_flow;
1506 struct nlmsghdr *nlh;
1509 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1510 size += flow_tcf_get_actions_and_size(actions, action_flags);
1511 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1513 rte_flow_error_set(error, ENOMEM,
1514 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1515 "not enough memory to create E-Switch flow");
1518 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1519 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1520 *dev_flow = (struct mlx5_flow){
1521 .tcf = (struct mlx5_flow_tcf){
1527 * Generate a reasonably unique handle based on the address of the
1530 * This is straightforward on 32-bit systems where the flow pointer can
1531 * be used directly. Otherwise, its least significant part is taken
1532 * after shifting it by the previous power of two of the pointed buffer
1535 if (sizeof(dev_flow) <= 4)
1536 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1538 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1539 rte_log2_u32(rte_align32prevpow2(size)));
1544 * Translate flow for Linux TC flower and construct Netlink message.
1547 * Pointer to the priv structure.
1548 * @param[in, out] flow
1549 * Pointer to the sub flow.
1551 * Pointer to the flow attributes.
1553 * Pointer to the list of items.
1554 * @param[in] actions
1555 * Pointer to the list of actions.
1557 * Pointer to the error structure.
1560 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1563 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1564 const struct rte_flow_attr *attr,
1565 const struct rte_flow_item items[],
1566 const struct rte_flow_action actions[],
1567 struct rte_flow_error *error)
1570 const struct rte_flow_item_port_id *port_id;
1571 const struct rte_flow_item_eth *eth;
1572 const struct rte_flow_item_vlan *vlan;
1573 const struct rte_flow_item_ipv4 *ipv4;
1574 const struct rte_flow_item_ipv6 *ipv6;
1575 const struct rte_flow_item_tcp *tcp;
1576 const struct rte_flow_item_udp *udp;
1579 const struct rte_flow_action_port_id *port_id;
1580 const struct rte_flow_action_jump *jump;
1581 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1582 const struct rte_flow_action_of_set_vlan_vid *
1584 const struct rte_flow_action_of_set_vlan_pcp *
1587 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1588 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1589 struct tcmsg *tcm = dev_flow->tcf.tcm;
1590 uint32_t na_act_index_cur;
1591 bool eth_type_set = 0;
1592 bool vlan_present = 0;
1593 bool vlan_eth_type_set = 0;
1594 bool ip_proto_set = 0;
1595 struct nlattr *na_flower;
1596 struct nlattr *na_flower_act;
1597 struct nlattr *na_vlan_id = NULL;
1598 struct nlattr *na_vlan_priority = NULL;
1599 uint64_t item_flags = 0;
1601 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1602 PTOI_TABLE_SZ_MAX(dev)));
1603 nlh = dev_flow->tcf.nlh;
1604 tcm = dev_flow->tcf.tcm;
1605 /* Prepare API must have been called beforehand. */
1606 assert(nlh != NULL && tcm != NULL);
1607 tcm->tcm_family = AF_UNSPEC;
1608 tcm->tcm_ifindex = ptoi[0].ifindex;
1609 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1611 * Priority cannot be zero to prevent the kernel from picking one
1614 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1615 RTE_BE16(ETH_P_ALL));
1616 if (attr->group > 0)
1617 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1618 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1619 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1620 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1621 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1624 switch (items->type) {
1625 case RTE_FLOW_ITEM_TYPE_VOID:
1627 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1628 mask.port_id = flow_tcf_item_mask
1629 (items, &rte_flow_item_port_id_mask,
1630 &flow_tcf_mask_supported.port_id,
1631 &flow_tcf_mask_empty.port_id,
1632 sizeof(flow_tcf_mask_supported.port_id),
1634 assert(mask.port_id);
1635 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1637 spec.port_id = items->spec;
1638 if (!mask.port_id->id)
1641 for (i = 0; ptoi[i].ifindex; ++i)
1642 if (ptoi[i].port_id == spec.port_id->id)
1644 assert(ptoi[i].ifindex);
1645 tcm->tcm_ifindex = ptoi[i].ifindex;
1647 case RTE_FLOW_ITEM_TYPE_ETH:
1648 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1649 mask.eth = flow_tcf_item_mask
1650 (items, &rte_flow_item_eth_mask,
1651 &flow_tcf_mask_supported.eth,
1652 &flow_tcf_mask_empty.eth,
1653 sizeof(flow_tcf_mask_supported.eth),
1656 if (mask.eth == &flow_tcf_mask_empty.eth)
1658 spec.eth = items->spec;
1659 if (mask.eth->type) {
1660 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1664 if (!is_zero_ether_addr(&mask.eth->dst)) {
1665 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1667 spec.eth->dst.addr_bytes);
1668 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1670 mask.eth->dst.addr_bytes);
1672 if (!is_zero_ether_addr(&mask.eth->src)) {
1673 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1675 spec.eth->src.addr_bytes);
1676 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1678 mask.eth->src.addr_bytes);
1681 case RTE_FLOW_ITEM_TYPE_VLAN:
1682 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1683 mask.vlan = flow_tcf_item_mask
1684 (items, &rte_flow_item_vlan_mask,
1685 &flow_tcf_mask_supported.vlan,
1686 &flow_tcf_mask_empty.vlan,
1687 sizeof(flow_tcf_mask_supported.vlan),
1691 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1692 RTE_BE16(ETH_P_8021Q));
1695 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1697 spec.vlan = items->spec;
1698 if (mask.vlan->inner_type) {
1699 mnl_attr_put_u16(nlh,
1700 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1701 spec.vlan->inner_type);
1702 vlan_eth_type_set = 1;
1704 if (mask.vlan->tci & RTE_BE16(0xe000))
1705 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1707 (spec.vlan->tci) >> 13) & 0x7);
1708 if (mask.vlan->tci & RTE_BE16(0x0fff))
1709 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1714 case RTE_FLOW_ITEM_TYPE_IPV4:
1715 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1716 mask.ipv4 = flow_tcf_item_mask
1717 (items, &rte_flow_item_ipv4_mask,
1718 &flow_tcf_mask_supported.ipv4,
1719 &flow_tcf_mask_empty.ipv4,
1720 sizeof(flow_tcf_mask_supported.ipv4),
1723 if (!eth_type_set || !vlan_eth_type_set)
1724 mnl_attr_put_u16(nlh,
1726 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1727 TCA_FLOWER_KEY_ETH_TYPE,
1728 RTE_BE16(ETH_P_IP));
1730 vlan_eth_type_set = 1;
1731 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1733 spec.ipv4 = items->spec;
1734 if (mask.ipv4->hdr.next_proto_id) {
1735 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1736 spec.ipv4->hdr.next_proto_id);
1739 if (mask.ipv4->hdr.src_addr) {
1740 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1741 spec.ipv4->hdr.src_addr);
1742 mnl_attr_put_u32(nlh,
1743 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1744 mask.ipv4->hdr.src_addr);
1746 if (mask.ipv4->hdr.dst_addr) {
1747 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1748 spec.ipv4->hdr.dst_addr);
1749 mnl_attr_put_u32(nlh,
1750 TCA_FLOWER_KEY_IPV4_DST_MASK,
1751 mask.ipv4->hdr.dst_addr);
1754 case RTE_FLOW_ITEM_TYPE_IPV6:
1755 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1756 mask.ipv6 = flow_tcf_item_mask
1757 (items, &rte_flow_item_ipv6_mask,
1758 &flow_tcf_mask_supported.ipv6,
1759 &flow_tcf_mask_empty.ipv6,
1760 sizeof(flow_tcf_mask_supported.ipv6),
1763 if (!eth_type_set || !vlan_eth_type_set)
1764 mnl_attr_put_u16(nlh,
1766 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1767 TCA_FLOWER_KEY_ETH_TYPE,
1768 RTE_BE16(ETH_P_IPV6));
1770 vlan_eth_type_set = 1;
1771 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1773 spec.ipv6 = items->spec;
1774 if (mask.ipv6->hdr.proto) {
1775 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1776 spec.ipv6->hdr.proto);
1779 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1780 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1781 sizeof(spec.ipv6->hdr.src_addr),
1782 spec.ipv6->hdr.src_addr);
1783 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1784 sizeof(mask.ipv6->hdr.src_addr),
1785 mask.ipv6->hdr.src_addr);
1787 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1788 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1789 sizeof(spec.ipv6->hdr.dst_addr),
1790 spec.ipv6->hdr.dst_addr);
1791 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1792 sizeof(mask.ipv6->hdr.dst_addr),
1793 mask.ipv6->hdr.dst_addr);
1796 case RTE_FLOW_ITEM_TYPE_UDP:
1797 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1798 mask.udp = flow_tcf_item_mask
1799 (items, &rte_flow_item_udp_mask,
1800 &flow_tcf_mask_supported.udp,
1801 &flow_tcf_mask_empty.udp,
1802 sizeof(flow_tcf_mask_supported.udp),
1806 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1808 if (mask.udp == &flow_tcf_mask_empty.udp)
1810 spec.udp = items->spec;
1811 if (mask.udp->hdr.src_port) {
1812 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1813 spec.udp->hdr.src_port);
1814 mnl_attr_put_u16(nlh,
1815 TCA_FLOWER_KEY_UDP_SRC_MASK,
1816 mask.udp->hdr.src_port);
1818 if (mask.udp->hdr.dst_port) {
1819 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1820 spec.udp->hdr.dst_port);
1821 mnl_attr_put_u16(nlh,
1822 TCA_FLOWER_KEY_UDP_DST_MASK,
1823 mask.udp->hdr.dst_port);
1826 case RTE_FLOW_ITEM_TYPE_TCP:
1827 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1828 mask.tcp = flow_tcf_item_mask
1829 (items, &rte_flow_item_tcp_mask,
1830 &flow_tcf_mask_supported.tcp,
1831 &flow_tcf_mask_empty.tcp,
1832 sizeof(flow_tcf_mask_supported.tcp),
1836 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1838 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1840 spec.tcp = items->spec;
1841 if (mask.tcp->hdr.src_port) {
1842 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1843 spec.tcp->hdr.src_port);
1844 mnl_attr_put_u16(nlh,
1845 TCA_FLOWER_KEY_TCP_SRC_MASK,
1846 mask.tcp->hdr.src_port);
1848 if (mask.tcp->hdr.dst_port) {
1849 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1850 spec.tcp->hdr.dst_port);
1851 mnl_attr_put_u16(nlh,
1852 TCA_FLOWER_KEY_TCP_DST_MASK,
1853 mask.tcp->hdr.dst_port);
1855 if (mask.tcp->hdr.tcp_flags) {
1858 TCA_FLOWER_KEY_TCP_FLAGS,
1860 (spec.tcp->hdr.tcp_flags));
1863 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1865 (mask.tcp->hdr.tcp_flags));
1869 return rte_flow_error_set(error, ENOTSUP,
1870 RTE_FLOW_ERROR_TYPE_ITEM,
1871 NULL, "item not supported");
1874 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1875 na_act_index_cur = 1;
1876 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1877 struct nlattr *na_act_index;
1878 struct nlattr *na_act;
1879 unsigned int vlan_act;
1882 switch (actions->type) {
1883 case RTE_FLOW_ACTION_TYPE_VOID:
1885 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1886 conf.port_id = actions->conf;
1887 if (conf.port_id->original)
1890 for (i = 0; ptoi[i].ifindex; ++i)
1891 if (ptoi[i].port_id == conf.port_id->id)
1893 assert(ptoi[i].ifindex);
1895 mnl_attr_nest_start(nlh, na_act_index_cur++);
1896 assert(na_act_index);
1897 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1898 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1900 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1901 sizeof(struct tc_mirred),
1902 &(struct tc_mirred){
1903 .action = TC_ACT_STOLEN,
1904 .eaction = TCA_EGRESS_REDIR,
1905 .ifindex = ptoi[i].ifindex,
1907 mnl_attr_nest_end(nlh, na_act);
1908 mnl_attr_nest_end(nlh, na_act_index);
1910 case RTE_FLOW_ACTION_TYPE_JUMP:
1911 conf.jump = actions->conf;
1913 mnl_attr_nest_start(nlh, na_act_index_cur++);
1914 assert(na_act_index);
1915 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1916 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1918 mnl_attr_put(nlh, TCA_GACT_PARMS,
1919 sizeof(struct tc_gact),
1921 .action = TC_ACT_GOTO_CHAIN |
1924 mnl_attr_nest_end(nlh, na_act);
1925 mnl_attr_nest_end(nlh, na_act_index);
1927 case RTE_FLOW_ACTION_TYPE_DROP:
1929 mnl_attr_nest_start(nlh, na_act_index_cur++);
1930 assert(na_act_index);
1931 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1932 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1934 mnl_attr_put(nlh, TCA_GACT_PARMS,
1935 sizeof(struct tc_gact),
1937 .action = TC_ACT_SHOT,
1939 mnl_attr_nest_end(nlh, na_act);
1940 mnl_attr_nest_end(nlh, na_act_index);
1942 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1943 conf.of_push_vlan = NULL;
1944 vlan_act = TCA_VLAN_ACT_POP;
1945 goto action_of_vlan;
1946 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1947 conf.of_push_vlan = actions->conf;
1948 vlan_act = TCA_VLAN_ACT_PUSH;
1949 goto action_of_vlan;
1950 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1951 conf.of_set_vlan_vid = actions->conf;
1953 goto override_na_vlan_id;
1954 vlan_act = TCA_VLAN_ACT_MODIFY;
1955 goto action_of_vlan;
1956 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1957 conf.of_set_vlan_pcp = actions->conf;
1958 if (na_vlan_priority)
1959 goto override_na_vlan_priority;
1960 vlan_act = TCA_VLAN_ACT_MODIFY;
1961 goto action_of_vlan;
1964 mnl_attr_nest_start(nlh, na_act_index_cur++);
1965 assert(na_act_index);
1966 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1967 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1969 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1970 sizeof(struct tc_vlan),
1972 .action = TC_ACT_PIPE,
1973 .v_action = vlan_act,
1975 if (vlan_act == TCA_VLAN_ACT_POP) {
1976 mnl_attr_nest_end(nlh, na_act);
1977 mnl_attr_nest_end(nlh, na_act_index);
1980 if (vlan_act == TCA_VLAN_ACT_PUSH)
1981 mnl_attr_put_u16(nlh,
1982 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1983 conf.of_push_vlan->ethertype);
1984 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1985 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1986 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1987 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1988 mnl_attr_nest_end(nlh, na_act);
1989 mnl_attr_nest_end(nlh, na_act_index);
1990 if (actions->type ==
1991 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1992 override_na_vlan_id:
1993 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1994 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1996 (conf.of_set_vlan_vid->vlan_vid);
1997 } else if (actions->type ==
1998 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1999 override_na_vlan_priority:
2000 na_vlan_priority->nla_type =
2001 TCA_VLAN_PUSH_VLAN_PRIORITY;
2002 *(uint8_t *)mnl_attr_get_payload
2003 (na_vlan_priority) =
2004 conf.of_set_vlan_pcp->vlan_pcp;
2007 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2008 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2009 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2010 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2011 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2012 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2013 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2014 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2016 mnl_attr_nest_start(nlh, na_act_index_cur++);
2017 flow_tcf_create_pedit_mnl_msg(nlh,
2018 &actions, item_flags);
2019 mnl_attr_nest_end(nlh, na_act_index);
2022 return rte_flow_error_set(error, ENOTSUP,
2023 RTE_FLOW_ERROR_TYPE_ACTION,
2025 "action not supported");
2029 assert(na_flower_act);
2030 mnl_attr_nest_end(nlh, na_flower_act);
2031 mnl_attr_nest_end(nlh, na_flower);
2036 * Send Netlink message with acknowledgment.
2039 * Libmnl socket to use.
2041 * Message to send. This function always raises the NLM_F_ACK flag before
2045 * 0 on success, a negative errno value otherwise and rte_errno is set.
2048 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
2050 alignas(struct nlmsghdr)
2051 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2052 nlh->nlmsg_len - sizeof(*nlh)];
2053 uint32_t seq = random();
2056 nlh->nlmsg_flags |= NLM_F_ACK;
2057 nlh->nlmsg_seq = seq;
2058 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2060 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2063 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2071 * Apply flow to E-Switch by sending Netlink message.
2074 * Pointer to Ethernet device.
2075 * @param[in, out] flow
2076 * Pointer to the sub flow.
2078 * Pointer to the error structure.
2081 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2084 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2085 struct rte_flow_error *error)
2087 struct priv *priv = dev->data->dev_private;
2088 struct mnl_socket *nl = priv->mnl_socket;
2089 struct mlx5_flow *dev_flow;
2090 struct nlmsghdr *nlh;
2092 dev_flow = LIST_FIRST(&flow->dev_flows);
2093 /* E-Switch flow can't be expanded. */
2094 assert(!LIST_NEXT(dev_flow, next));
2095 nlh = dev_flow->tcf.nlh;
2096 nlh->nlmsg_type = RTM_NEWTFILTER;
2097 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2098 if (!flow_tcf_nl_ack(nl, nlh))
2100 return rte_flow_error_set(error, rte_errno,
2101 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2102 "netlink: failed to create TC flow rule");
2106 * Remove flow from E-Switch by sending Netlink message.
2109 * Pointer to Ethernet device.
2110 * @param[in, out] flow
2111 * Pointer to the sub flow.
2114 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2116 struct priv *priv = dev->data->dev_private;
2117 struct mnl_socket *nl = priv->mnl_socket;
2118 struct mlx5_flow *dev_flow;
2119 struct nlmsghdr *nlh;
2123 dev_flow = LIST_FIRST(&flow->dev_flows);
2126 /* E-Switch flow can't be expanded. */
2127 assert(!LIST_NEXT(dev_flow, next));
2128 nlh = dev_flow->tcf.nlh;
2129 nlh->nlmsg_type = RTM_DELTFILTER;
2130 nlh->nlmsg_flags = NLM_F_REQUEST;
2131 flow_tcf_nl_ack(nl, nlh);
2135 * Remove flow from E-Switch and release resources of the device flow.
2138 * Pointer to Ethernet device.
2139 * @param[in, out] flow
2140 * Pointer to the sub flow.
2143 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2145 struct mlx5_flow *dev_flow;
2149 flow_tcf_remove(dev, flow);
2150 dev_flow = LIST_FIRST(&flow->dev_flows);
2153 /* E-Switch flow can't be expanded. */
2154 assert(!LIST_NEXT(dev_flow, next));
2155 LIST_REMOVE(dev_flow, next);
2159 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2160 .validate = flow_tcf_validate,
2161 .prepare = flow_tcf_prepare,
2162 .translate = flow_tcf_translate,
2163 .apply = flow_tcf_apply,
2164 .remove = flow_tcf_remove,
2165 .destroy = flow_tcf_destroy,
2169 * Initialize ingress qdisc of a given network interface.
2172 * Libmnl socket of the @p NETLINK_ROUTE kind.
2174 * Index of network interface to initialize.
2176 * Perform verbose error reporting if not NULL.
2179 * 0 on success, a negative errno value otherwise and rte_errno is set.
2182 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2183 struct rte_flow_error *error)
2185 struct nlmsghdr *nlh;
2187 alignas(struct nlmsghdr)
2188 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2190 /* Destroy existing ingress qdisc and everything attached to it. */
2191 nlh = mnl_nlmsg_put_header(buf);
2192 nlh->nlmsg_type = RTM_DELQDISC;
2193 nlh->nlmsg_flags = NLM_F_REQUEST;
2194 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2195 tcm->tcm_family = AF_UNSPEC;
2196 tcm->tcm_ifindex = ifindex;
2197 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2198 tcm->tcm_parent = TC_H_INGRESS;
2199 /* Ignore errors when qdisc is already absent. */
2200 if (flow_tcf_nl_ack(nl, nlh) &&
2201 rte_errno != EINVAL && rte_errno != ENOENT)
2202 return rte_flow_error_set(error, rte_errno,
2203 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2204 "netlink: failed to remove ingress"
2206 /* Create fresh ingress qdisc. */
2207 nlh = mnl_nlmsg_put_header(buf);
2208 nlh->nlmsg_type = RTM_NEWQDISC;
2209 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2210 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2211 tcm->tcm_family = AF_UNSPEC;
2212 tcm->tcm_ifindex = ifindex;
2213 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2214 tcm->tcm_parent = TC_H_INGRESS;
2215 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2216 if (flow_tcf_nl_ack(nl, nlh))
2217 return rte_flow_error_set(error, rte_errno,
2218 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2219 "netlink: failed to create ingress"
2225 * Create and configure a libmnl socket for Netlink flow rules.
2228 * A valid libmnl socket object pointer on success, NULL otherwise and
2232 mlx5_flow_tcf_socket_create(void)
2234 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2237 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2239 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2244 mnl_socket_close(nl);
2249 * Destroy a libmnl socket.
2252 * Libmnl socket of the @p NETLINK_ROUTE kind.
2255 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2257 mnl_socket_close(nl);