1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
105 struct tc_pedit_sel {
109 struct tc_pedit_key keys[0];
112 #endif /* HAVE_TC_ACT_VLAN */
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
128 #ifndef HAVE_TCA_CHAIN
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
234 /** Empty masks for known item types. */
236 struct rte_flow_item_port_id port_id;
237 struct rte_flow_item_eth eth;
238 struct rte_flow_item_vlan vlan;
239 struct rte_flow_item_ipv4 ipv4;
240 struct rte_flow_item_ipv6 ipv6;
241 struct rte_flow_item_tcp tcp;
242 struct rte_flow_item_udp udp;
243 } flow_tcf_mask_empty;
245 /** Supported masks for known item types. */
246 static const struct {
247 struct rte_flow_item_port_id port_id;
248 struct rte_flow_item_eth eth;
249 struct rte_flow_item_vlan vlan;
250 struct rte_flow_item_ipv4 ipv4;
251 struct rte_flow_item_ipv6 ipv6;
252 struct rte_flow_item_tcp tcp;
253 struct rte_flow_item_udp udp;
254 } flow_tcf_mask_supported = {
259 .type = RTE_BE16(0xffff),
260 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
261 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
264 /* PCP and VID only, no DEI. */
265 .tci = RTE_BE16(0xefff),
266 .inner_type = RTE_BE16(0xffff),
269 .next_proto_id = 0xff,
270 .src_addr = RTE_BE32(0xffffffff),
271 .dst_addr = RTE_BE32(0xffffffff),
276 "\xff\xff\xff\xff\xff\xff\xff\xff"
277 "\xff\xff\xff\xff\xff\xff\xff\xff",
279 "\xff\xff\xff\xff\xff\xff\xff\xff"
280 "\xff\xff\xff\xff\xff\xff\xff\xff",
283 .src_port = RTE_BE16(0xffff),
284 .dst_port = RTE_BE16(0xffff),
288 .src_port = RTE_BE16(0xffff),
289 .dst_port = RTE_BE16(0xffff),
293 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
294 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
295 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
296 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
297 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
299 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
301 /** DPDK port to network interface index (ifindex) conversion. */
302 struct flow_tcf_ptoi {
303 uint16_t port_id; /**< DPDK port ID. */
304 unsigned int ifindex; /**< Network interface index. */
307 /* Due to a limitation on driver/FW. */
308 #define MLX5_TCF_GROUP_ID_MAX 3
309 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
311 #define MLX5_TCF_FATE_ACTIONS \
312 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
313 MLX5_FLOW_ACTION_JUMP)
315 #define MLX5_TCF_VLAN_ACTIONS \
316 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
317 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
319 #define MLX5_TCF_PEDIT_ACTIONS \
320 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
321 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
322 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
324 #define MLX5_TCF_CONFIG_ACTIONS \
325 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
326 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
327 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
329 #define MAX_PEDIT_KEYS 128
330 #define SZ_PEDIT_KEY_VAL 4
332 #define NUM_OF_PEDIT_KEYS(sz) \
333 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
335 struct pedit_key_ex {
336 enum pedit_header_type htype;
340 struct pedit_parser {
341 struct tc_pedit_sel sel;
342 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
343 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
348 * Set pedit key of transport (TCP/UDP) port value
351 * pointer to action specification
352 * @param[in,out] p_parser
353 * pointer to pedit_parser
354 * @param[in] item_flags
355 * flags of all items presented
358 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
359 struct pedit_parser *p_parser,
362 int idx = p_parser->sel.nkeys;
364 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
365 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
366 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
367 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
368 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
369 /* offset of src/dst port is same for TCP and UDP */
370 p_parser->keys[idx].off =
371 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
372 offsetof(struct tcp_hdr, src_port) :
373 offsetof(struct tcp_hdr, dst_port);
374 p_parser->keys[idx].mask = 0xFFFF0000;
375 p_parser->keys[idx].val =
376 (__u32)((const struct rte_flow_action_set_tp *)
377 actions->conf)->port;
378 p_parser->sel.nkeys = (++idx);
382 * Set pedit key of ipv6 address
385 * pointer to action specification
386 * @param[in,out] p_parser
387 * pointer to pedit_parser
390 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
391 struct pedit_parser *p_parser)
393 int idx = p_parser->sel.nkeys;
394 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
396 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
397 offsetof(struct ipv6_hdr, src_addr) :
398 offsetof(struct ipv6_hdr, dst_addr);
399 const struct rte_flow_action_set_ipv6 *conf =
400 (const struct rte_flow_action_set_ipv6 *)actions->conf;
402 for (int i = 0; i < keys; i++, idx++) {
403 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
404 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
405 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
406 p_parser->keys[idx].mask = ~UINT32_MAX;
407 memcpy(&p_parser->keys[idx].val,
408 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
411 p_parser->sel.nkeys += keys;
415 * Set pedit key of ipv4 address
418 * pointer to action specification
419 * @param[in,out] p_parser
420 * pointer to pedit_parser
423 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
424 struct pedit_parser *p_parser)
426 int idx = p_parser->sel.nkeys;
428 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
429 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
430 p_parser->keys[idx].off =
431 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
432 offsetof(struct ipv4_hdr, src_addr) :
433 offsetof(struct ipv4_hdr, dst_addr);
434 p_parser->keys[idx].mask = ~UINT32_MAX;
435 p_parser->keys[idx].val =
436 ((const struct rte_flow_action_set_ipv4 *)
437 actions->conf)->ipv4_addr;
438 p_parser->sel.nkeys = (++idx);
442 * Create the pedit's na attribute in netlink message
443 * on pre-allocate message buffer
446 * pointer to pre-allocated netlink message buffer
447 * @param[in,out] actions
448 * pointer to pointer of actions specification.
449 * @param[in,out] action_flags
450 * pointer to actions flags
451 * @param[in] item_flags
452 * flags of all item presented
455 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
456 const struct rte_flow_action **actions,
459 struct pedit_parser p_parser;
460 struct nlattr *na_act_options;
461 struct nlattr *na_pedit_keys;
463 memset(&p_parser, 0, sizeof(p_parser));
464 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
465 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
466 /* all modify header actions should be in one tc-pedit action */
467 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
468 switch ((*actions)->type) {
469 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
470 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
471 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
473 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
474 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
475 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
477 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
478 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
479 flow_tcf_pedit_key_set_tp_port(*actions,
480 &p_parser, item_flags);
483 goto pedit_mnl_msg_done;
487 p_parser.sel.action = TC_ACT_PIPE;
488 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
489 sizeof(p_parser.sel) +
490 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
493 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
494 for (int i = 0; i < p_parser.sel.nkeys; i++) {
495 struct nlattr *na_pedit_key =
496 mnl_attr_nest_start(nl,
497 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
498 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
499 p_parser.keys_ex[i].htype);
500 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
501 p_parser.keys_ex[i].cmd);
502 mnl_attr_nest_end(nl, na_pedit_key);
504 mnl_attr_nest_end(nl, na_pedit_keys);
505 mnl_attr_nest_end(nl, na_act_options);
510 * Calculate max memory size of one TC-pedit actions.
511 * One TC-pedit action can contain set of keys each defining
512 * a rewrite element (rte_flow action)
514 * @param[in,out] actions
515 * actions specification.
516 * @param[in,out] action_flags
518 * @param[in,out] size
521 * Max memory size of one TC-pedit action
524 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
525 uint64_t *action_flags)
531 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
532 SZ_NLATTR_STRZ_OF("pedit") +
533 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
534 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
535 switch ((*actions)->type) {
536 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
537 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
538 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
540 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
541 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
542 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
544 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
545 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
546 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
548 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
549 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
550 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
552 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
553 /* TCP is as same as UDP */
554 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
555 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
557 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
558 /* TCP is as same as UDP */
559 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
560 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
563 goto get_pedit_action_size_done;
566 get_pedit_action_size_done:
567 /* TCA_PEDIT_PARAMS_EX */
569 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
570 keys * sizeof(struct tc_pedit_key));
571 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
573 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
574 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
575 SZ_NLATTR_DATA_OF(2));
576 (*action_flags) |= flags;
582 * Retrieve mask for pattern item.
584 * This function does basic sanity checks on a pattern item in order to
585 * return the most appropriate mask for it.
588 * Item specification.
589 * @param[in] mask_default
590 * Default mask for pattern item as specified by the flow API.
591 * @param[in] mask_supported
592 * Mask fields supported by the implementation.
593 * @param[in] mask_empty
594 * Empty mask to return when there is no specification.
596 * Perform verbose error reporting if not NULL.
599 * Either @p item->mask or one of the mask parameters on success, NULL
600 * otherwise and rte_errno is set.
603 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
604 const void *mask_supported, const void *mask_empty,
605 size_t mask_size, struct rte_flow_error *error)
610 /* item->last and item->mask cannot exist without item->spec. */
611 if (!item->spec && (item->mask || item->last)) {
612 rte_flow_error_set(error, EINVAL,
613 RTE_FLOW_ERROR_TYPE_ITEM, item,
614 "\"mask\" or \"last\" field provided without"
615 " a corresponding \"spec\"");
618 /* No spec, no mask, no problem. */
621 mask = item->mask ? item->mask : mask_default;
624 * Single-pass check to make sure that:
625 * - Mask is supported, no bits are set outside mask_supported.
626 * - Both item->spec and item->last are included in mask.
628 for (i = 0; i != mask_size; ++i) {
631 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
632 ((const uint8_t *)mask_supported)[i]) {
633 rte_flow_error_set(error, ENOTSUP,
634 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
635 "unsupported field found"
640 (((const uint8_t *)item->spec)[i] & mask[i]) !=
641 (((const uint8_t *)item->last)[i] & mask[i])) {
642 rte_flow_error_set(error, EINVAL,
643 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
645 "range between \"spec\" and \"last\""
646 " not comprised in \"mask\"");
654 * Build a conversion table between port ID and ifindex.
657 * Pointer to Ethernet device.
659 * Pointer to ptoi table.
661 * Size of ptoi table provided.
664 * Size of ptoi table filled.
667 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
670 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
671 uint16_t port_id[n + 1];
673 unsigned int own = 0;
675 /* At least one port is needed when no switch domain is present. */
678 port_id[0] = dev->data->port_id;
680 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
684 for (i = 0; i != n; ++i) {
685 struct rte_eth_dev_info dev_info;
687 rte_eth_dev_info_get(port_id[i], &dev_info);
688 if (port_id[i] == dev->data->port_id)
690 ptoi[i].port_id = port_id[i];
691 ptoi[i].ifindex = dev_info.if_index;
693 /* Ensure first entry of ptoi[] is the current device. */
699 /* An entry with zero ifindex terminates ptoi[]. */
706 * Verify the @p attr will be correctly understood by the E-switch.
709 * Pointer to flow attributes
711 * Pointer to error structure.
714 * 0 on success, a negative errno value otherwise and rte_errno is set.
717 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
718 struct rte_flow_error *error)
721 * Supported attributes: groups, some priorities and ingress only.
722 * group is supported only if kernel supports chain. Don't care about
723 * transfer as it is the caller's problem.
725 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
726 return rte_flow_error_set(error, ENOTSUP,
727 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
728 "group ID larger than "
729 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
731 else if (attr->group > 0 &&
732 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
733 return rte_flow_error_set(error, ENOTSUP,
734 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
736 "lowest priority level is "
737 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
738 " when group is configured");
739 else if (attr->priority > 0xfffe)
740 return rte_flow_error_set(error, ENOTSUP,
741 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
743 "lowest priority level is 0xfffe");
745 return rte_flow_error_set(error, EINVAL,
746 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
747 attr, "only ingress is supported");
749 return rte_flow_error_set(error, ENOTSUP,
750 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
751 attr, "egress is not supported");
756 * Validate flow for E-Switch.
759 * Pointer to the priv structure.
761 * Pointer to the flow attributes.
763 * Pointer to the list of items.
765 * Pointer to the list of actions.
767 * Pointer to the error structure.
770 * 0 on success, a negative errno value otherwise and rte_ernno is set.
773 flow_tcf_validate(struct rte_eth_dev *dev,
774 const struct rte_flow_attr *attr,
775 const struct rte_flow_item items[],
776 const struct rte_flow_action actions[],
777 struct rte_flow_error *error)
780 const struct rte_flow_item_port_id *port_id;
781 const struct rte_flow_item_eth *eth;
782 const struct rte_flow_item_vlan *vlan;
783 const struct rte_flow_item_ipv4 *ipv4;
784 const struct rte_flow_item_ipv6 *ipv6;
785 const struct rte_flow_item_tcp *tcp;
786 const struct rte_flow_item_udp *udp;
789 const struct rte_flow_action_port_id *port_id;
790 const struct rte_flow_action_jump *jump;
791 const struct rte_flow_action_of_push_vlan *of_push_vlan;
792 const struct rte_flow_action_of_set_vlan_vid *
794 const struct rte_flow_action_of_set_vlan_pcp *
796 const struct rte_flow_action_set_ipv4 *set_ipv4;
797 const struct rte_flow_action_set_ipv6 *set_ipv6;
799 uint32_t item_flags = 0;
800 uint32_t action_flags = 0;
801 uint8_t next_protocol = -1;
802 unsigned int tcm_ifindex = 0;
803 uint8_t pedit_validated = 0;
804 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
805 struct rte_eth_dev *port_id_dev = NULL;
809 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
810 PTOI_TABLE_SZ_MAX(dev)));
811 ret = flow_tcf_validate_attributes(attr, error);
814 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
817 switch (items->type) {
818 case RTE_FLOW_ITEM_TYPE_VOID:
820 case RTE_FLOW_ITEM_TYPE_PORT_ID:
821 mask.port_id = flow_tcf_item_mask
822 (items, &rte_flow_item_port_id_mask,
823 &flow_tcf_mask_supported.port_id,
824 &flow_tcf_mask_empty.port_id,
825 sizeof(flow_tcf_mask_supported.port_id),
829 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
833 spec.port_id = items->spec;
834 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
835 return rte_flow_error_set
837 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
839 "no support for partial mask on"
841 if (!mask.port_id->id)
844 for (i = 0; ptoi[i].ifindex; ++i)
845 if (ptoi[i].port_id == spec.port_id->id)
847 if (!ptoi[i].ifindex)
848 return rte_flow_error_set
850 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
852 "missing data to convert port ID to"
854 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
855 return rte_flow_error_set
857 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
859 "cannot match traffic for"
860 " several port IDs through"
861 " a single flow rule");
862 tcm_ifindex = ptoi[i].ifindex;
865 case RTE_FLOW_ITEM_TYPE_ETH:
866 ret = mlx5_flow_validate_item_eth(items, item_flags,
870 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
872 * Redundant check due to different supported mask.
873 * Same for the rest of items.
875 mask.eth = flow_tcf_item_mask
876 (items, &rte_flow_item_eth_mask,
877 &flow_tcf_mask_supported.eth,
878 &flow_tcf_mask_empty.eth,
879 sizeof(flow_tcf_mask_supported.eth),
883 if (mask.eth->type && mask.eth->type !=
885 return rte_flow_error_set
887 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
889 "no support for partial mask on"
892 case RTE_FLOW_ITEM_TYPE_VLAN:
893 ret = mlx5_flow_validate_item_vlan(items, item_flags,
897 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
898 mask.vlan = flow_tcf_item_mask
899 (items, &rte_flow_item_vlan_mask,
900 &flow_tcf_mask_supported.vlan,
901 &flow_tcf_mask_empty.vlan,
902 sizeof(flow_tcf_mask_supported.vlan),
906 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
907 (mask.vlan->tci & RTE_BE16(0xe000)) !=
909 (mask.vlan->tci & RTE_BE16(0x0fff) &&
910 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
912 (mask.vlan->inner_type &&
913 mask.vlan->inner_type != RTE_BE16(0xffff)))
914 return rte_flow_error_set
916 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
918 "no support for partial masks on"
919 " \"tci\" (PCP and VID parts) and"
920 " \"inner_type\" fields");
922 case RTE_FLOW_ITEM_TYPE_IPV4:
923 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
927 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
928 mask.ipv4 = flow_tcf_item_mask
929 (items, &rte_flow_item_ipv4_mask,
930 &flow_tcf_mask_supported.ipv4,
931 &flow_tcf_mask_empty.ipv4,
932 sizeof(flow_tcf_mask_supported.ipv4),
936 if (mask.ipv4->hdr.next_proto_id &&
937 mask.ipv4->hdr.next_proto_id != 0xff)
938 return rte_flow_error_set
940 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
942 "no support for partial mask on"
943 " \"hdr.next_proto_id\" field");
944 else if (mask.ipv4->hdr.next_proto_id)
946 ((const struct rte_flow_item_ipv4 *)
947 (items->spec))->hdr.next_proto_id;
949 case RTE_FLOW_ITEM_TYPE_IPV6:
950 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
954 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
955 mask.ipv6 = flow_tcf_item_mask
956 (items, &rte_flow_item_ipv6_mask,
957 &flow_tcf_mask_supported.ipv6,
958 &flow_tcf_mask_empty.ipv6,
959 sizeof(flow_tcf_mask_supported.ipv6),
963 if (mask.ipv6->hdr.proto &&
964 mask.ipv6->hdr.proto != 0xff)
965 return rte_flow_error_set
967 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
969 "no support for partial mask on"
970 " \"hdr.proto\" field");
971 else if (mask.ipv6->hdr.proto)
973 ((const struct rte_flow_item_ipv6 *)
974 (items->spec))->hdr.proto;
976 case RTE_FLOW_ITEM_TYPE_UDP:
977 ret = mlx5_flow_validate_item_udp(items, item_flags,
978 next_protocol, error);
981 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
982 mask.udp = flow_tcf_item_mask
983 (items, &rte_flow_item_udp_mask,
984 &flow_tcf_mask_supported.udp,
985 &flow_tcf_mask_empty.udp,
986 sizeof(flow_tcf_mask_supported.udp),
991 case RTE_FLOW_ITEM_TYPE_TCP:
992 ret = mlx5_flow_validate_item_tcp
995 &flow_tcf_mask_supported.tcp,
999 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1000 mask.tcp = flow_tcf_item_mask
1001 (items, &rte_flow_item_tcp_mask,
1002 &flow_tcf_mask_supported.tcp,
1003 &flow_tcf_mask_empty.tcp,
1004 sizeof(flow_tcf_mask_supported.tcp),
1010 return rte_flow_error_set(error, ENOTSUP,
1011 RTE_FLOW_ERROR_TYPE_ITEM,
1012 NULL, "item not supported");
1015 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1017 uint32_t current_action_flag = 0;
1019 switch (actions->type) {
1020 case RTE_FLOW_ACTION_TYPE_VOID:
1022 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1023 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1026 conf.port_id = actions->conf;
1027 if (conf.port_id->original)
1030 for (i = 0; ptoi[i].ifindex; ++i)
1031 if (ptoi[i].port_id == conf.port_id->id)
1033 if (!ptoi[i].ifindex)
1034 return rte_flow_error_set
1036 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1038 "missing data to convert port ID to"
1040 port_id_dev = &rte_eth_devices[conf.port_id->id];
1042 case RTE_FLOW_ACTION_TYPE_JUMP:
1043 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1046 conf.jump = actions->conf;
1047 if (attr->group >= conf.jump->group)
1048 return rte_flow_error_set
1050 RTE_FLOW_ERROR_TYPE_ACTION,
1052 "can jump only to a group forward");
1054 case RTE_FLOW_ACTION_TYPE_DROP:
1055 current_action_flag = MLX5_FLOW_ACTION_DROP;
1057 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1058 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1060 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1061 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1063 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1064 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1065 return rte_flow_error_set
1067 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1068 "vlan modify is not supported,"
1069 " set action must follow push action");
1070 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1072 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1073 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1074 return rte_flow_error_set
1076 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1077 "vlan modify is not supported,"
1078 " set action must follow push action");
1079 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1081 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1082 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1084 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1085 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1087 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1088 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1090 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1091 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1093 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1094 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1096 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1097 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1100 return rte_flow_error_set(error, ENOTSUP,
1101 RTE_FLOW_ERROR_TYPE_ACTION,
1103 "action not supported");
1105 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1107 return rte_flow_error_set(error, EINVAL,
1108 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1110 "action configuration not set");
1112 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1114 return rte_flow_error_set(error, ENOTSUP,
1115 RTE_FLOW_ERROR_TYPE_ACTION,
1117 "set actions should be "
1118 "listed successively");
1119 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1120 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1121 pedit_validated = 1;
1122 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1123 (action_flags & MLX5_TCF_FATE_ACTIONS))
1124 return rte_flow_error_set(error, EINVAL,
1125 RTE_FLOW_ERROR_TYPE_ACTION,
1127 "can't have multiple fate"
1129 action_flags |= current_action_flag;
1131 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1132 (action_flags & MLX5_FLOW_ACTION_DROP))
1133 return rte_flow_error_set(error, ENOTSUP,
1134 RTE_FLOW_ERROR_TYPE_ACTION,
1136 "set action is not compatible with "
1138 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1139 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1140 return rte_flow_error_set(error, ENOTSUP,
1141 RTE_FLOW_ERROR_TYPE_ACTION,
1143 "set action must be followed by "
1146 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1147 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1148 return rte_flow_error_set(error, EINVAL,
1149 RTE_FLOW_ERROR_TYPE_ACTION,
1151 "no ipv4 item found in"
1155 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1156 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1157 return rte_flow_error_set(error, EINVAL,
1158 RTE_FLOW_ERROR_TYPE_ACTION,
1160 "no ipv6 item found in"
1164 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1166 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1167 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1168 return rte_flow_error_set(error, EINVAL,
1169 RTE_FLOW_ERROR_TYPE_ACTION,
1171 "no TCP/UDP item found in"
1175 * FW syndrome (0xA9C090):
1176 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1177 * forward to the uplink.
1179 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1180 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1181 ((struct priv *)port_id_dev->data->dev_private)->representor)
1182 return rte_flow_error_set(error, ENOTSUP,
1183 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1184 "vlan push can only be applied"
1185 " when forwarding to uplink port");
1187 * FW syndrome (0x294609):
1188 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1189 * are supported only while forwarding to vport.
1191 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1192 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1193 return rte_flow_error_set(error, ENOTSUP,
1194 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1195 "vlan actions are supported"
1196 " only with port_id action");
1197 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1198 return rte_flow_error_set(error, EINVAL,
1199 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1200 "no fate action is found");
1205 * Calculate maximum size of memory for flow items of Linux TC flower and
1206 * extract specified items.
1209 * Pointer to the list of items.
1210 * @param[out] item_flags
1211 * Pointer to the detected items.
1214 * Maximum size of memory for items.
1217 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1218 const struct rte_flow_item items[],
1219 uint64_t *item_flags)
1224 size += SZ_NLATTR_STRZ_OF("flower") +
1225 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1226 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1227 if (attr->group > 0)
1228 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1229 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1230 switch (items->type) {
1231 case RTE_FLOW_ITEM_TYPE_VOID:
1233 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1235 case RTE_FLOW_ITEM_TYPE_ETH:
1236 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1237 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1238 /* dst/src MAC addr and mask. */
1239 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1241 case RTE_FLOW_ITEM_TYPE_VLAN:
1242 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1243 SZ_NLATTR_TYPE_OF(uint16_t) +
1244 /* VLAN Ether type. */
1245 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1246 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1247 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1249 case RTE_FLOW_ITEM_TYPE_IPV4:
1250 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1251 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1252 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1253 /* dst/src IP addr and mask. */
1254 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1256 case RTE_FLOW_ITEM_TYPE_IPV6:
1257 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1258 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1259 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1260 /* dst/src IP addr and mask. */
1261 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1263 case RTE_FLOW_ITEM_TYPE_UDP:
1264 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1265 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1266 /* dst/src port and mask. */
1267 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1269 case RTE_FLOW_ITEM_TYPE_TCP:
1270 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1271 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1272 /* dst/src port and mask. */
1273 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1277 "unsupported item %p type %d,"
1278 " items must be validated before flow creation",
1279 (const void *)items, items->type);
1283 *item_flags = flags;
1288 * Calculate maximum size of memory for flow actions of Linux TC flower and
1289 * extract specified actions.
1291 * @param[in] actions
1292 * Pointer to the list of actions.
1293 * @param[out] action_flags
1294 * Pointer to the detected actions.
1297 * Maximum size of memory for actions.
1300 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1301 uint64_t *action_flags)
1306 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1307 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1308 switch (actions->type) {
1309 case RTE_FLOW_ACTION_TYPE_VOID:
1311 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1312 size += SZ_NLATTR_NEST + /* na_act_index. */
1313 SZ_NLATTR_STRZ_OF("mirred") +
1314 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1315 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1316 flags |= MLX5_FLOW_ACTION_PORT_ID;
1318 case RTE_FLOW_ACTION_TYPE_JUMP:
1319 size += SZ_NLATTR_NEST + /* na_act_index. */
1320 SZ_NLATTR_STRZ_OF("gact") +
1321 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1322 SZ_NLATTR_TYPE_OF(struct tc_gact);
1323 flags |= MLX5_FLOW_ACTION_JUMP;
1325 case RTE_FLOW_ACTION_TYPE_DROP:
1326 size += SZ_NLATTR_NEST + /* na_act_index. */
1327 SZ_NLATTR_STRZ_OF("gact") +
1328 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1329 SZ_NLATTR_TYPE_OF(struct tc_gact);
1330 flags |= MLX5_FLOW_ACTION_DROP;
1332 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1333 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1334 goto action_of_vlan;
1335 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1336 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1337 goto action_of_vlan;
1338 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1339 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1340 goto action_of_vlan;
1341 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1342 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1343 goto action_of_vlan;
1345 size += SZ_NLATTR_NEST + /* na_act_index. */
1346 SZ_NLATTR_STRZ_OF("vlan") +
1347 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1348 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1349 SZ_NLATTR_TYPE_OF(uint16_t) +
1350 /* VLAN protocol. */
1351 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1352 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1354 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1355 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1356 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1357 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1358 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1359 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1360 size += flow_tcf_get_pedit_actions_size(&actions,
1365 "unsupported action %p type %d,"
1366 " items must be validated before flow creation",
1367 (const void *)actions, actions->type);
1371 *action_flags = flags;
1376 * Brand rtnetlink buffer with unique handle.
1378 * This handle should be unique for a given network interface to avoid
1382 * Pointer to Netlink message.
1384 * Unique 32-bit handle to use.
1387 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1389 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1391 tcm->tcm_handle = handle;
1392 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1393 (void *)nlh, handle);
1397 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1398 * memory required, allocates the memory, initializes Netlink message headers
1399 * and set unique TC message handle.
1402 * Pointer to the flow attributes.
1404 * Pointer to the list of items.
1405 * @param[in] actions
1406 * Pointer to the list of actions.
1407 * @param[out] item_flags
1408 * Pointer to bit mask of all items detected.
1409 * @param[out] action_flags
1410 * Pointer to bit mask of all actions detected.
1412 * Pointer to the error structure.
1415 * Pointer to mlx5_flow object on success,
1416 * otherwise NULL and rte_ernno is set.
1418 static struct mlx5_flow *
1419 flow_tcf_prepare(const struct rte_flow_attr *attr,
1420 const struct rte_flow_item items[],
1421 const struct rte_flow_action actions[],
1422 uint64_t *item_flags, uint64_t *action_flags,
1423 struct rte_flow_error *error)
1425 size_t size = sizeof(struct mlx5_flow) +
1426 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1427 MNL_ALIGN(sizeof(struct tcmsg));
1428 struct mlx5_flow *dev_flow;
1429 struct nlmsghdr *nlh;
1432 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1433 size += flow_tcf_get_actions_and_size(actions, action_flags);
1434 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1436 rte_flow_error_set(error, ENOMEM,
1437 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1438 "not enough memory to create E-Switch flow");
1441 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1442 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1443 *dev_flow = (struct mlx5_flow){
1444 .tcf = (struct mlx5_flow_tcf){
1450 * Generate a reasonably unique handle based on the address of the
1453 * This is straightforward on 32-bit systems where the flow pointer can
1454 * be used directly. Otherwise, its least significant part is taken
1455 * after shifting it by the previous power of two of the pointed buffer
1458 if (sizeof(dev_flow) <= 4)
1459 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1461 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1462 rte_log2_u32(rte_align32prevpow2(size)));
1467 * Translate flow for Linux TC flower and construct Netlink message.
1470 * Pointer to the priv structure.
1471 * @param[in, out] flow
1472 * Pointer to the sub flow.
1474 * Pointer to the flow attributes.
1476 * Pointer to the list of items.
1477 * @param[in] actions
1478 * Pointer to the list of actions.
1480 * Pointer to the error structure.
1483 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1486 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1487 const struct rte_flow_attr *attr,
1488 const struct rte_flow_item items[],
1489 const struct rte_flow_action actions[],
1490 struct rte_flow_error *error)
1493 const struct rte_flow_item_port_id *port_id;
1494 const struct rte_flow_item_eth *eth;
1495 const struct rte_flow_item_vlan *vlan;
1496 const struct rte_flow_item_ipv4 *ipv4;
1497 const struct rte_flow_item_ipv6 *ipv6;
1498 const struct rte_flow_item_tcp *tcp;
1499 const struct rte_flow_item_udp *udp;
1502 const struct rte_flow_action_port_id *port_id;
1503 const struct rte_flow_action_jump *jump;
1504 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1505 const struct rte_flow_action_of_set_vlan_vid *
1507 const struct rte_flow_action_of_set_vlan_pcp *
1510 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1511 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1512 struct tcmsg *tcm = dev_flow->tcf.tcm;
1513 uint32_t na_act_index_cur;
1514 bool eth_type_set = 0;
1515 bool vlan_present = 0;
1516 bool vlan_eth_type_set = 0;
1517 bool ip_proto_set = 0;
1518 struct nlattr *na_flower;
1519 struct nlattr *na_flower_act;
1520 struct nlattr *na_vlan_id = NULL;
1521 struct nlattr *na_vlan_priority = NULL;
1522 uint64_t item_flags = 0;
1524 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1525 PTOI_TABLE_SZ_MAX(dev)));
1526 nlh = dev_flow->tcf.nlh;
1527 tcm = dev_flow->tcf.tcm;
1528 /* Prepare API must have been called beforehand. */
1529 assert(nlh != NULL && tcm != NULL);
1530 tcm->tcm_family = AF_UNSPEC;
1531 tcm->tcm_ifindex = ptoi[0].ifindex;
1532 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1534 * Priority cannot be zero to prevent the kernel from picking one
1537 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1538 RTE_BE16(ETH_P_ALL));
1539 if (attr->group > 0)
1540 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1541 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1542 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1543 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1544 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1547 switch (items->type) {
1548 case RTE_FLOW_ITEM_TYPE_VOID:
1550 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1551 mask.port_id = flow_tcf_item_mask
1552 (items, &rte_flow_item_port_id_mask,
1553 &flow_tcf_mask_supported.port_id,
1554 &flow_tcf_mask_empty.port_id,
1555 sizeof(flow_tcf_mask_supported.port_id),
1557 assert(mask.port_id);
1558 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1560 spec.port_id = items->spec;
1561 if (!mask.port_id->id)
1564 for (i = 0; ptoi[i].ifindex; ++i)
1565 if (ptoi[i].port_id == spec.port_id->id)
1567 assert(ptoi[i].ifindex);
1568 tcm->tcm_ifindex = ptoi[i].ifindex;
1570 case RTE_FLOW_ITEM_TYPE_ETH:
1571 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1572 mask.eth = flow_tcf_item_mask
1573 (items, &rte_flow_item_eth_mask,
1574 &flow_tcf_mask_supported.eth,
1575 &flow_tcf_mask_empty.eth,
1576 sizeof(flow_tcf_mask_supported.eth),
1579 if (mask.eth == &flow_tcf_mask_empty.eth)
1581 spec.eth = items->spec;
1582 if (mask.eth->type) {
1583 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1587 if (!is_zero_ether_addr(&mask.eth->dst)) {
1588 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1590 spec.eth->dst.addr_bytes);
1591 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1593 mask.eth->dst.addr_bytes);
1595 if (!is_zero_ether_addr(&mask.eth->src)) {
1596 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1598 spec.eth->src.addr_bytes);
1599 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1601 mask.eth->src.addr_bytes);
1604 case RTE_FLOW_ITEM_TYPE_VLAN:
1605 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1606 mask.vlan = flow_tcf_item_mask
1607 (items, &rte_flow_item_vlan_mask,
1608 &flow_tcf_mask_supported.vlan,
1609 &flow_tcf_mask_empty.vlan,
1610 sizeof(flow_tcf_mask_supported.vlan),
1614 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1615 RTE_BE16(ETH_P_8021Q));
1618 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1620 spec.vlan = items->spec;
1621 if (mask.vlan->inner_type) {
1622 mnl_attr_put_u16(nlh,
1623 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1624 spec.vlan->inner_type);
1625 vlan_eth_type_set = 1;
1627 if (mask.vlan->tci & RTE_BE16(0xe000))
1628 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1630 (spec.vlan->tci) >> 13) & 0x7);
1631 if (mask.vlan->tci & RTE_BE16(0x0fff))
1632 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1637 case RTE_FLOW_ITEM_TYPE_IPV4:
1638 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1639 mask.ipv4 = flow_tcf_item_mask
1640 (items, &rte_flow_item_ipv4_mask,
1641 &flow_tcf_mask_supported.ipv4,
1642 &flow_tcf_mask_empty.ipv4,
1643 sizeof(flow_tcf_mask_supported.ipv4),
1646 if (!eth_type_set || !vlan_eth_type_set)
1647 mnl_attr_put_u16(nlh,
1649 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1650 TCA_FLOWER_KEY_ETH_TYPE,
1651 RTE_BE16(ETH_P_IP));
1653 vlan_eth_type_set = 1;
1654 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1656 spec.ipv4 = items->spec;
1657 if (mask.ipv4->hdr.next_proto_id) {
1658 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1659 spec.ipv4->hdr.next_proto_id);
1662 if (mask.ipv4->hdr.src_addr) {
1663 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1664 spec.ipv4->hdr.src_addr);
1665 mnl_attr_put_u32(nlh,
1666 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1667 mask.ipv4->hdr.src_addr);
1669 if (mask.ipv4->hdr.dst_addr) {
1670 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1671 spec.ipv4->hdr.dst_addr);
1672 mnl_attr_put_u32(nlh,
1673 TCA_FLOWER_KEY_IPV4_DST_MASK,
1674 mask.ipv4->hdr.dst_addr);
1677 case RTE_FLOW_ITEM_TYPE_IPV6:
1678 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1679 mask.ipv6 = flow_tcf_item_mask
1680 (items, &rte_flow_item_ipv6_mask,
1681 &flow_tcf_mask_supported.ipv6,
1682 &flow_tcf_mask_empty.ipv6,
1683 sizeof(flow_tcf_mask_supported.ipv6),
1686 if (!eth_type_set || !vlan_eth_type_set)
1687 mnl_attr_put_u16(nlh,
1689 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1690 TCA_FLOWER_KEY_ETH_TYPE,
1691 RTE_BE16(ETH_P_IPV6));
1693 vlan_eth_type_set = 1;
1694 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1696 spec.ipv6 = items->spec;
1697 if (mask.ipv6->hdr.proto) {
1698 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1699 spec.ipv6->hdr.proto);
1702 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1703 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1704 sizeof(spec.ipv6->hdr.src_addr),
1705 spec.ipv6->hdr.src_addr);
1706 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1707 sizeof(mask.ipv6->hdr.src_addr),
1708 mask.ipv6->hdr.src_addr);
1710 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1711 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1712 sizeof(spec.ipv6->hdr.dst_addr),
1713 spec.ipv6->hdr.dst_addr);
1714 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1715 sizeof(mask.ipv6->hdr.dst_addr),
1716 mask.ipv6->hdr.dst_addr);
1719 case RTE_FLOW_ITEM_TYPE_UDP:
1720 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1721 mask.udp = flow_tcf_item_mask
1722 (items, &rte_flow_item_udp_mask,
1723 &flow_tcf_mask_supported.udp,
1724 &flow_tcf_mask_empty.udp,
1725 sizeof(flow_tcf_mask_supported.udp),
1729 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1731 if (mask.udp == &flow_tcf_mask_empty.udp)
1733 spec.udp = items->spec;
1734 if (mask.udp->hdr.src_port) {
1735 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1736 spec.udp->hdr.src_port);
1737 mnl_attr_put_u16(nlh,
1738 TCA_FLOWER_KEY_UDP_SRC_MASK,
1739 mask.udp->hdr.src_port);
1741 if (mask.udp->hdr.dst_port) {
1742 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1743 spec.udp->hdr.dst_port);
1744 mnl_attr_put_u16(nlh,
1745 TCA_FLOWER_KEY_UDP_DST_MASK,
1746 mask.udp->hdr.dst_port);
1749 case RTE_FLOW_ITEM_TYPE_TCP:
1750 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1751 mask.tcp = flow_tcf_item_mask
1752 (items, &rte_flow_item_tcp_mask,
1753 &flow_tcf_mask_supported.tcp,
1754 &flow_tcf_mask_empty.tcp,
1755 sizeof(flow_tcf_mask_supported.tcp),
1759 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1761 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1763 spec.tcp = items->spec;
1764 if (mask.tcp->hdr.src_port) {
1765 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1766 spec.tcp->hdr.src_port);
1767 mnl_attr_put_u16(nlh,
1768 TCA_FLOWER_KEY_TCP_SRC_MASK,
1769 mask.tcp->hdr.src_port);
1771 if (mask.tcp->hdr.dst_port) {
1772 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1773 spec.tcp->hdr.dst_port);
1774 mnl_attr_put_u16(nlh,
1775 TCA_FLOWER_KEY_TCP_DST_MASK,
1776 mask.tcp->hdr.dst_port);
1778 if (mask.tcp->hdr.tcp_flags) {
1781 TCA_FLOWER_KEY_TCP_FLAGS,
1783 (spec.tcp->hdr.tcp_flags));
1786 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1788 (mask.tcp->hdr.tcp_flags));
1792 return rte_flow_error_set(error, ENOTSUP,
1793 RTE_FLOW_ERROR_TYPE_ITEM,
1794 NULL, "item not supported");
1797 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1798 na_act_index_cur = 1;
1799 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1800 struct nlattr *na_act_index;
1801 struct nlattr *na_act;
1802 unsigned int vlan_act;
1805 switch (actions->type) {
1806 case RTE_FLOW_ACTION_TYPE_VOID:
1808 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1809 conf.port_id = actions->conf;
1810 if (conf.port_id->original)
1813 for (i = 0; ptoi[i].ifindex; ++i)
1814 if (ptoi[i].port_id == conf.port_id->id)
1816 assert(ptoi[i].ifindex);
1818 mnl_attr_nest_start(nlh, na_act_index_cur++);
1819 assert(na_act_index);
1820 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1821 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1823 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1824 sizeof(struct tc_mirred),
1825 &(struct tc_mirred){
1826 .action = TC_ACT_STOLEN,
1827 .eaction = TCA_EGRESS_REDIR,
1828 .ifindex = ptoi[i].ifindex,
1830 mnl_attr_nest_end(nlh, na_act);
1831 mnl_attr_nest_end(nlh, na_act_index);
1833 case RTE_FLOW_ACTION_TYPE_JUMP:
1834 conf.jump = actions->conf;
1836 mnl_attr_nest_start(nlh, na_act_index_cur++);
1837 assert(na_act_index);
1838 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1839 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1841 mnl_attr_put(nlh, TCA_GACT_PARMS,
1842 sizeof(struct tc_gact),
1844 .action = TC_ACT_GOTO_CHAIN |
1847 mnl_attr_nest_end(nlh, na_act);
1848 mnl_attr_nest_end(nlh, na_act_index);
1850 case RTE_FLOW_ACTION_TYPE_DROP:
1852 mnl_attr_nest_start(nlh, na_act_index_cur++);
1853 assert(na_act_index);
1854 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1855 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1857 mnl_attr_put(nlh, TCA_GACT_PARMS,
1858 sizeof(struct tc_gact),
1860 .action = TC_ACT_SHOT,
1862 mnl_attr_nest_end(nlh, na_act);
1863 mnl_attr_nest_end(nlh, na_act_index);
1865 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1866 conf.of_push_vlan = NULL;
1867 vlan_act = TCA_VLAN_ACT_POP;
1868 goto action_of_vlan;
1869 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1870 conf.of_push_vlan = actions->conf;
1871 vlan_act = TCA_VLAN_ACT_PUSH;
1872 goto action_of_vlan;
1873 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1874 conf.of_set_vlan_vid = actions->conf;
1876 goto override_na_vlan_id;
1877 vlan_act = TCA_VLAN_ACT_MODIFY;
1878 goto action_of_vlan;
1879 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1880 conf.of_set_vlan_pcp = actions->conf;
1881 if (na_vlan_priority)
1882 goto override_na_vlan_priority;
1883 vlan_act = TCA_VLAN_ACT_MODIFY;
1884 goto action_of_vlan;
1887 mnl_attr_nest_start(nlh, na_act_index_cur++);
1888 assert(na_act_index);
1889 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1890 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1892 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1893 sizeof(struct tc_vlan),
1895 .action = TC_ACT_PIPE,
1896 .v_action = vlan_act,
1898 if (vlan_act == TCA_VLAN_ACT_POP) {
1899 mnl_attr_nest_end(nlh, na_act);
1900 mnl_attr_nest_end(nlh, na_act_index);
1903 if (vlan_act == TCA_VLAN_ACT_PUSH)
1904 mnl_attr_put_u16(nlh,
1905 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1906 conf.of_push_vlan->ethertype);
1907 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1908 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1909 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1910 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1911 mnl_attr_nest_end(nlh, na_act);
1912 mnl_attr_nest_end(nlh, na_act_index);
1913 if (actions->type ==
1914 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1915 override_na_vlan_id:
1916 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1917 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1919 (conf.of_set_vlan_vid->vlan_vid);
1920 } else if (actions->type ==
1921 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1922 override_na_vlan_priority:
1923 na_vlan_priority->nla_type =
1924 TCA_VLAN_PUSH_VLAN_PRIORITY;
1925 *(uint8_t *)mnl_attr_get_payload
1926 (na_vlan_priority) =
1927 conf.of_set_vlan_pcp->vlan_pcp;
1930 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1931 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1932 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1933 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1934 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1935 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1937 mnl_attr_nest_start(nlh, na_act_index_cur++);
1938 flow_tcf_create_pedit_mnl_msg(nlh,
1939 &actions, item_flags);
1940 mnl_attr_nest_end(nlh, na_act_index);
1943 return rte_flow_error_set(error, ENOTSUP,
1944 RTE_FLOW_ERROR_TYPE_ACTION,
1946 "action not supported");
1950 assert(na_flower_act);
1951 mnl_attr_nest_end(nlh, na_flower_act);
1952 mnl_attr_nest_end(nlh, na_flower);
1957 * Send Netlink message with acknowledgment.
1960 * Libmnl socket to use.
1962 * Message to send. This function always raises the NLM_F_ACK flag before
1966 * 0 on success, a negative errno value otherwise and rte_errno is set.
1969 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1971 alignas(struct nlmsghdr)
1972 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1973 nlh->nlmsg_len - sizeof(*nlh)];
1974 uint32_t seq = random();
1977 nlh->nlmsg_flags |= NLM_F_ACK;
1978 nlh->nlmsg_seq = seq;
1979 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1981 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1984 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1992 * Apply flow to E-Switch by sending Netlink message.
1995 * Pointer to Ethernet device.
1996 * @param[in, out] flow
1997 * Pointer to the sub flow.
1999 * Pointer to the error structure.
2002 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2005 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2006 struct rte_flow_error *error)
2008 struct priv *priv = dev->data->dev_private;
2009 struct mnl_socket *nl = priv->mnl_socket;
2010 struct mlx5_flow *dev_flow;
2011 struct nlmsghdr *nlh;
2013 dev_flow = LIST_FIRST(&flow->dev_flows);
2014 /* E-Switch flow can't be expanded. */
2015 assert(!LIST_NEXT(dev_flow, next));
2016 nlh = dev_flow->tcf.nlh;
2017 nlh->nlmsg_type = RTM_NEWTFILTER;
2018 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2019 if (!flow_tcf_nl_ack(nl, nlh))
2021 return rte_flow_error_set(error, rte_errno,
2022 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2023 "netlink: failed to create TC flow rule");
2027 * Remove flow from E-Switch by sending Netlink message.
2030 * Pointer to Ethernet device.
2031 * @param[in, out] flow
2032 * Pointer to the sub flow.
2035 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2037 struct priv *priv = dev->data->dev_private;
2038 struct mnl_socket *nl = priv->mnl_socket;
2039 struct mlx5_flow *dev_flow;
2040 struct nlmsghdr *nlh;
2044 dev_flow = LIST_FIRST(&flow->dev_flows);
2047 /* E-Switch flow can't be expanded. */
2048 assert(!LIST_NEXT(dev_flow, next));
2049 nlh = dev_flow->tcf.nlh;
2050 nlh->nlmsg_type = RTM_DELTFILTER;
2051 nlh->nlmsg_flags = NLM_F_REQUEST;
2052 flow_tcf_nl_ack(nl, nlh);
2056 * Remove flow from E-Switch and release resources of the device flow.
2059 * Pointer to Ethernet device.
2060 * @param[in, out] flow
2061 * Pointer to the sub flow.
2064 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2066 struct mlx5_flow *dev_flow;
2070 flow_tcf_remove(dev, flow);
2071 dev_flow = LIST_FIRST(&flow->dev_flows);
2074 /* E-Switch flow can't be expanded. */
2075 assert(!LIST_NEXT(dev_flow, next));
2076 LIST_REMOVE(dev_flow, next);
2080 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2081 .validate = flow_tcf_validate,
2082 .prepare = flow_tcf_prepare,
2083 .translate = flow_tcf_translate,
2084 .apply = flow_tcf_apply,
2085 .remove = flow_tcf_remove,
2086 .destroy = flow_tcf_destroy,
2090 * Initialize ingress qdisc of a given network interface.
2093 * Libmnl socket of the @p NETLINK_ROUTE kind.
2095 * Index of network interface to initialize.
2097 * Perform verbose error reporting if not NULL.
2100 * 0 on success, a negative errno value otherwise and rte_errno is set.
2103 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2104 struct rte_flow_error *error)
2106 struct nlmsghdr *nlh;
2108 alignas(struct nlmsghdr)
2109 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2111 /* Destroy existing ingress qdisc and everything attached to it. */
2112 nlh = mnl_nlmsg_put_header(buf);
2113 nlh->nlmsg_type = RTM_DELQDISC;
2114 nlh->nlmsg_flags = NLM_F_REQUEST;
2115 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2116 tcm->tcm_family = AF_UNSPEC;
2117 tcm->tcm_ifindex = ifindex;
2118 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2119 tcm->tcm_parent = TC_H_INGRESS;
2120 /* Ignore errors when qdisc is already absent. */
2121 if (flow_tcf_nl_ack(nl, nlh) &&
2122 rte_errno != EINVAL && rte_errno != ENOENT)
2123 return rte_flow_error_set(error, rte_errno,
2124 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2125 "netlink: failed to remove ingress"
2127 /* Create fresh ingress qdisc. */
2128 nlh = mnl_nlmsg_put_header(buf);
2129 nlh->nlmsg_type = RTM_NEWQDISC;
2130 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2131 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2132 tcm->tcm_family = AF_UNSPEC;
2133 tcm->tcm_ifindex = ifindex;
2134 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2135 tcm->tcm_parent = TC_H_INGRESS;
2136 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2137 if (flow_tcf_nl_ack(nl, nlh))
2138 return rte_flow_error_set(error, rte_errno,
2139 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2140 "netlink: failed to create ingress"
2146 * Create and configure a libmnl socket for Netlink flow rules.
2149 * A valid libmnl socket object pointer on success, NULL otherwise and
2153 mlx5_flow_tcf_socket_create(void)
2155 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2158 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2160 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2165 mnl_socket_close(nl);
2170 * Destroy a libmnl socket.
2173 * Libmnl socket of the @p NETLINK_ROUTE kind.
2176 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2178 mnl_socket_close(nl);