1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
104 struct tc_pedit_sel {
108 struct tc_pedit_key keys[0];
111 #endif /* HAVE_TC_ACT_VLAN */
113 /* Normally found in linux/netlink.h. */
114 #ifndef NETLINK_CAP_ACK
115 #define NETLINK_CAP_ACK 10
118 /* Normally found in linux/pkt_sched.h. */
119 #ifndef TC_H_MIN_INGRESS
120 #define TC_H_MIN_INGRESS 0xfff2u
123 /* Normally found in linux/pkt_cls.h. */
124 #ifndef TCA_CLS_FLAGS_SKIP_SW
125 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #ifndef HAVE_TCA_CHAIN
130 #ifndef HAVE_TCA_FLOWER_ACT
131 #define TCA_FLOWER_ACT 3
133 #ifndef HAVE_TCA_FLOWER_FLAGS
134 #define TCA_FLOWER_FLAGS 22
136 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
137 #define TCA_FLOWER_KEY_ETH_TYPE 8
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
140 #define TCA_FLOWER_KEY_ETH_DST 4
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
143 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
146 #define TCA_FLOWER_KEY_ETH_SRC 6
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
149 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
151 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
152 #define TCA_FLOWER_KEY_IP_PROTO 9
154 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
155 #define TCA_FLOWER_KEY_IPV4_SRC 10
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
158 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
161 #define TCA_FLOWER_KEY_IPV4_DST 12
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
164 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
167 #define TCA_FLOWER_KEY_IPV6_SRC 14
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
170 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
173 #define TCA_FLOWER_KEY_IPV6_DST 16
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
176 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
178 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
179 #define TCA_FLOWER_KEY_TCP_SRC 18
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
182 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
185 #define TCA_FLOWER_KEY_TCP_DST 19
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
188 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
190 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
191 #define TCA_FLOWER_KEY_UDP_SRC 20
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
194 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
197 #define TCA_FLOWER_KEY_UDP_DST 21
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
200 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
202 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
203 #define TCA_FLOWER_KEY_VLAN_ID 23
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
206 #define TCA_FLOWER_KEY_VLAN_PRIO 24
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
209 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
211 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
212 #define TCA_FLOWER_KEY_TCP_FLAGS 71
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
215 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
217 #ifndef HAVE_TC_ACT_GOTO_CHAIN
218 #define TC_ACT_GOTO_CHAIN 0x20000000
221 #ifndef IPV6_ADDR_LEN
222 #define IPV6_ADDR_LEN 16
225 #ifndef IPV4_ADDR_LEN
226 #define IPV4_ADDR_LEN 4
230 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
233 /** Empty masks for known item types. */
235 struct rte_flow_item_port_id port_id;
236 struct rte_flow_item_eth eth;
237 struct rte_flow_item_vlan vlan;
238 struct rte_flow_item_ipv4 ipv4;
239 struct rte_flow_item_ipv6 ipv6;
240 struct rte_flow_item_tcp tcp;
241 struct rte_flow_item_udp udp;
242 } flow_tcf_mask_empty;
244 /** Supported masks for known item types. */
245 static const struct {
246 struct rte_flow_item_port_id port_id;
247 struct rte_flow_item_eth eth;
248 struct rte_flow_item_vlan vlan;
249 struct rte_flow_item_ipv4 ipv4;
250 struct rte_flow_item_ipv6 ipv6;
251 struct rte_flow_item_tcp tcp;
252 struct rte_flow_item_udp udp;
253 } flow_tcf_mask_supported = {
258 .type = RTE_BE16(0xffff),
259 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
260 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
263 /* PCP and VID only, no DEI. */
264 .tci = RTE_BE16(0xefff),
265 .inner_type = RTE_BE16(0xffff),
268 .next_proto_id = 0xff,
269 .src_addr = RTE_BE32(0xffffffff),
270 .dst_addr = RTE_BE32(0xffffffff),
275 "\xff\xff\xff\xff\xff\xff\xff\xff"
276 "\xff\xff\xff\xff\xff\xff\xff\xff",
278 "\xff\xff\xff\xff\xff\xff\xff\xff"
279 "\xff\xff\xff\xff\xff\xff\xff\xff",
282 .src_port = RTE_BE16(0xffff),
283 .dst_port = RTE_BE16(0xffff),
287 .src_port = RTE_BE16(0xffff),
288 .dst_port = RTE_BE16(0xffff),
292 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
293 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
294 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
295 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
296 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
298 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
300 /** DPDK port to network interface index (ifindex) conversion. */
301 struct flow_tcf_ptoi {
302 uint16_t port_id; /**< DPDK port ID. */
303 unsigned int ifindex; /**< Network interface index. */
306 /* Due to a limitation on driver/FW. */
307 #define MLX5_TCF_GROUP_ID_MAX 3
308 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
310 #define MLX5_TCF_FATE_ACTIONS \
311 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
312 MLX5_FLOW_ACTION_JUMP)
314 #define MLX5_TCF_VLAN_ACTIONS \
315 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
316 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
318 #define MLX5_TCF_PEDIT_ACTIONS \
319 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
320 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
321 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
323 #define MLX5_TCF_CONFIG_ACTIONS \
324 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
325 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
326 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
328 #define MAX_PEDIT_KEYS 128
329 #define SZ_PEDIT_KEY_VAL 4
331 #define NUM_OF_PEDIT_KEYS(sz) \
332 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
334 struct pedit_key_ex {
335 enum pedit_header_type htype;
339 struct pedit_parser {
340 struct tc_pedit_sel sel;
341 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
342 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
347 * Set pedit key of transport (TCP/UDP) port value
350 * pointer to action specification
351 * @param[in,out] p_parser
352 * pointer to pedit_parser
353 * @param[in] item_flags
354 * flags of all items presented
357 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
358 struct pedit_parser *p_parser,
361 int idx = p_parser->sel.nkeys;
363 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
364 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
365 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
366 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
367 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
368 /* offset of src/dst port is same for TCP and UDP */
369 p_parser->keys[idx].off =
370 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
371 offsetof(struct tcp_hdr, src_port) :
372 offsetof(struct tcp_hdr, dst_port);
373 p_parser->keys[idx].mask = 0xFFFF0000;
374 p_parser->keys[idx].val =
375 (__u32)((const struct rte_flow_action_set_tp *)
376 actions->conf)->port;
377 p_parser->sel.nkeys = (++idx);
381 * Set pedit key of ipv6 address
384 * pointer to action specification
385 * @param[in,out] p_parser
386 * pointer to pedit_parser
389 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
390 struct pedit_parser *p_parser)
392 int idx = p_parser->sel.nkeys;
393 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
395 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
396 offsetof(struct ipv6_hdr, src_addr) :
397 offsetof(struct ipv6_hdr, dst_addr);
398 const struct rte_flow_action_set_ipv6 *conf =
399 (const struct rte_flow_action_set_ipv6 *)actions->conf;
401 for (int i = 0; i < keys; i++, idx++) {
402 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
403 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
404 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
405 p_parser->keys[idx].mask = ~UINT32_MAX;
406 memcpy(&p_parser->keys[idx].val,
407 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
410 p_parser->sel.nkeys += keys;
414 * Set pedit key of ipv4 address
417 * pointer to action specification
418 * @param[in,out] p_parser
419 * pointer to pedit_parser
422 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
423 struct pedit_parser *p_parser)
425 int idx = p_parser->sel.nkeys;
427 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
428 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
429 p_parser->keys[idx].off =
430 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
431 offsetof(struct ipv4_hdr, src_addr) :
432 offsetof(struct ipv4_hdr, dst_addr);
433 p_parser->keys[idx].mask = ~UINT32_MAX;
434 p_parser->keys[idx].val =
435 ((const struct rte_flow_action_set_ipv4 *)
436 actions->conf)->ipv4_addr;
437 p_parser->sel.nkeys = (++idx);
441 * Create the pedit's na attribute in netlink message
442 * on pre-allocate message buffer
445 * pointer to pre-allocated netlink message buffer
446 * @param[in,out] actions
447 * pointer to pointer of actions specification.
448 * @param[in,out] action_flags
449 * pointer to actions flags
450 * @param[in] item_flags
451 * flags of all item presented
454 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
455 const struct rte_flow_action **actions,
458 struct pedit_parser p_parser;
459 struct nlattr *na_act_options;
460 struct nlattr *na_pedit_keys;
462 memset(&p_parser, 0, sizeof(p_parser));
463 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
464 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
465 /* all modify header actions should be in one tc-pedit action */
466 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
467 switch ((*actions)->type) {
468 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
469 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
470 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
472 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
473 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
474 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
476 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
477 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
478 flow_tcf_pedit_key_set_tp_port(*actions,
479 &p_parser, item_flags);
482 goto pedit_mnl_msg_done;
486 p_parser.sel.action = TC_ACT_PIPE;
487 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
488 sizeof(p_parser.sel) +
489 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
492 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
493 for (int i = 0; i < p_parser.sel.nkeys; i++) {
494 struct nlattr *na_pedit_key =
495 mnl_attr_nest_start(nl,
496 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
497 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
498 p_parser.keys_ex[i].htype);
499 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
500 p_parser.keys_ex[i].cmd);
501 mnl_attr_nest_end(nl, na_pedit_key);
503 mnl_attr_nest_end(nl, na_pedit_keys);
504 mnl_attr_nest_end(nl, na_act_options);
509 * Calculate max memory size of one TC-pedit actions.
510 * One TC-pedit action can contain set of keys each defining
511 * a rewrite element (rte_flow action)
513 * @param[in,out] actions
514 * actions specification.
515 * @param[in,out] action_flags
517 * @param[in,out] size
520 * Max memory size of one TC-pedit action
523 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
524 uint64_t *action_flags)
530 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
531 SZ_NLATTR_STRZ_OF("pedit") +
532 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
533 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
534 switch ((*actions)->type) {
535 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
536 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
537 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
539 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
540 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
541 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
543 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
544 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
545 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
547 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
548 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
549 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
551 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
552 /* TCP is as same as UDP */
553 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
554 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
556 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
557 /* TCP is as same as UDP */
558 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
559 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
562 goto get_pedit_action_size_done;
565 get_pedit_action_size_done:
566 /* TCA_PEDIT_PARAMS_EX */
568 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
569 keys * sizeof(struct tc_pedit_key));
570 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
572 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
573 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
574 SZ_NLATTR_DATA_OF(2));
575 (*action_flags) |= flags;
581 * Retrieve mask for pattern item.
583 * This function does basic sanity checks on a pattern item in order to
584 * return the most appropriate mask for it.
587 * Item specification.
588 * @param[in] mask_default
589 * Default mask for pattern item as specified by the flow API.
590 * @param[in] mask_supported
591 * Mask fields supported by the implementation.
592 * @param[in] mask_empty
593 * Empty mask to return when there is no specification.
595 * Perform verbose error reporting if not NULL.
598 * Either @p item->mask or one of the mask parameters on success, NULL
599 * otherwise and rte_errno is set.
602 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
603 const void *mask_supported, const void *mask_empty,
604 size_t mask_size, struct rte_flow_error *error)
609 /* item->last and item->mask cannot exist without item->spec. */
610 if (!item->spec && (item->mask || item->last)) {
611 rte_flow_error_set(error, EINVAL,
612 RTE_FLOW_ERROR_TYPE_ITEM, item,
613 "\"mask\" or \"last\" field provided without"
614 " a corresponding \"spec\"");
617 /* No spec, no mask, no problem. */
620 mask = item->mask ? item->mask : mask_default;
623 * Single-pass check to make sure that:
624 * - Mask is supported, no bits are set outside mask_supported.
625 * - Both item->spec and item->last are included in mask.
627 for (i = 0; i != mask_size; ++i) {
630 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
631 ((const uint8_t *)mask_supported)[i]) {
632 rte_flow_error_set(error, ENOTSUP,
633 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
634 "unsupported field found"
639 (((const uint8_t *)item->spec)[i] & mask[i]) !=
640 (((const uint8_t *)item->last)[i] & mask[i])) {
641 rte_flow_error_set(error, EINVAL,
642 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
644 "range between \"spec\" and \"last\""
645 " not comprised in \"mask\"");
653 * Build a conversion table between port ID and ifindex.
656 * Pointer to Ethernet device.
658 * Pointer to ptoi table.
660 * Size of ptoi table provided.
663 * Size of ptoi table filled.
666 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
669 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
670 uint16_t port_id[n + 1];
672 unsigned int own = 0;
674 /* At least one port is needed when no switch domain is present. */
677 port_id[0] = dev->data->port_id;
679 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
683 for (i = 0; i != n; ++i) {
684 struct rte_eth_dev_info dev_info;
686 rte_eth_dev_info_get(port_id[i], &dev_info);
687 if (port_id[i] == dev->data->port_id)
689 ptoi[i].port_id = port_id[i];
690 ptoi[i].ifindex = dev_info.if_index;
692 /* Ensure first entry of ptoi[] is the current device. */
698 /* An entry with zero ifindex terminates ptoi[]. */
705 * Verify the @p attr will be correctly understood by the E-switch.
708 * Pointer to flow attributes
710 * Pointer to error structure.
713 * 0 on success, a negative errno value otherwise and rte_errno is set.
716 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
717 struct rte_flow_error *error)
720 * Supported attributes: groups, some priorities and ingress only.
721 * group is supported only if kernel supports chain. Don't care about
722 * transfer as it is the caller's problem.
724 if (attr->group > MLX5_TCF_GROUP_ID_MAX)
725 return rte_flow_error_set(error, ENOTSUP,
726 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
727 "group ID larger than "
728 RTE_STR(MLX5_TCF_GROUP_ID_MAX)
730 else if (attr->group > 0 &&
731 attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
732 return rte_flow_error_set(error, ENOTSUP,
733 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
735 "lowest priority level is "
736 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
737 " when group is configured");
738 else if (attr->priority > 0xfffe)
739 return rte_flow_error_set(error, ENOTSUP,
740 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
742 "lowest priority level is 0xfffe");
744 return rte_flow_error_set(error, EINVAL,
745 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
746 attr, "only ingress is supported");
748 return rte_flow_error_set(error, ENOTSUP,
749 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
750 attr, "egress is not supported");
755 * Validate flow for E-Switch.
758 * Pointer to the priv structure.
760 * Pointer to the flow attributes.
762 * Pointer to the list of items.
764 * Pointer to the list of actions.
766 * Pointer to the error structure.
769 * 0 on success, a negative errno value otherwise and rte_ernno is set.
772 flow_tcf_validate(struct rte_eth_dev *dev,
773 const struct rte_flow_attr *attr,
774 const struct rte_flow_item items[],
775 const struct rte_flow_action actions[],
776 struct rte_flow_error *error)
779 const struct rte_flow_item_port_id *port_id;
780 const struct rte_flow_item_eth *eth;
781 const struct rte_flow_item_vlan *vlan;
782 const struct rte_flow_item_ipv4 *ipv4;
783 const struct rte_flow_item_ipv6 *ipv6;
784 const struct rte_flow_item_tcp *tcp;
785 const struct rte_flow_item_udp *udp;
788 const struct rte_flow_action_port_id *port_id;
789 const struct rte_flow_action_jump *jump;
790 const struct rte_flow_action_of_push_vlan *of_push_vlan;
791 const struct rte_flow_action_of_set_vlan_vid *
793 const struct rte_flow_action_of_set_vlan_pcp *
795 const struct rte_flow_action_set_ipv4 *set_ipv4;
796 const struct rte_flow_action_set_ipv6 *set_ipv6;
798 uint32_t item_flags = 0;
799 uint32_t action_flags = 0;
800 uint8_t next_protocol = -1;
801 unsigned int tcm_ifindex = 0;
802 uint8_t pedit_validated = 0;
803 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
804 struct rte_eth_dev *port_id_dev = NULL;
808 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
809 PTOI_TABLE_SZ_MAX(dev)));
810 ret = flow_tcf_validate_attributes(attr, error);
813 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
816 switch (items->type) {
817 case RTE_FLOW_ITEM_TYPE_VOID:
819 case RTE_FLOW_ITEM_TYPE_PORT_ID:
820 mask.port_id = flow_tcf_item_mask
821 (items, &rte_flow_item_port_id_mask,
822 &flow_tcf_mask_supported.port_id,
823 &flow_tcf_mask_empty.port_id,
824 sizeof(flow_tcf_mask_supported.port_id),
828 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
832 spec.port_id = items->spec;
833 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
834 return rte_flow_error_set
836 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
838 "no support for partial mask on"
840 if (!mask.port_id->id)
843 for (i = 0; ptoi[i].ifindex; ++i)
844 if (ptoi[i].port_id == spec.port_id->id)
846 if (!ptoi[i].ifindex)
847 return rte_flow_error_set
849 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
851 "missing data to convert port ID to"
853 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
854 return rte_flow_error_set
856 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
858 "cannot match traffic for"
859 " several port IDs through"
860 " a single flow rule");
861 tcm_ifindex = ptoi[i].ifindex;
864 case RTE_FLOW_ITEM_TYPE_ETH:
865 ret = mlx5_flow_validate_item_eth(items, item_flags,
869 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
871 * Redundant check due to different supported mask.
872 * Same for the rest of items.
874 mask.eth = flow_tcf_item_mask
875 (items, &rte_flow_item_eth_mask,
876 &flow_tcf_mask_supported.eth,
877 &flow_tcf_mask_empty.eth,
878 sizeof(flow_tcf_mask_supported.eth),
882 if (mask.eth->type && mask.eth->type !=
884 return rte_flow_error_set
886 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
888 "no support for partial mask on"
891 case RTE_FLOW_ITEM_TYPE_VLAN:
892 ret = mlx5_flow_validate_item_vlan(items, item_flags,
896 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
897 mask.vlan = flow_tcf_item_mask
898 (items, &rte_flow_item_vlan_mask,
899 &flow_tcf_mask_supported.vlan,
900 &flow_tcf_mask_empty.vlan,
901 sizeof(flow_tcf_mask_supported.vlan),
905 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
906 (mask.vlan->tci & RTE_BE16(0xe000)) !=
908 (mask.vlan->tci & RTE_BE16(0x0fff) &&
909 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
911 (mask.vlan->inner_type &&
912 mask.vlan->inner_type != RTE_BE16(0xffff)))
913 return rte_flow_error_set
915 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
917 "no support for partial masks on"
918 " \"tci\" (PCP and VID parts) and"
919 " \"inner_type\" fields");
921 case RTE_FLOW_ITEM_TYPE_IPV4:
922 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
926 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
927 mask.ipv4 = flow_tcf_item_mask
928 (items, &rte_flow_item_ipv4_mask,
929 &flow_tcf_mask_supported.ipv4,
930 &flow_tcf_mask_empty.ipv4,
931 sizeof(flow_tcf_mask_supported.ipv4),
935 if (mask.ipv4->hdr.next_proto_id &&
936 mask.ipv4->hdr.next_proto_id != 0xff)
937 return rte_flow_error_set
939 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
941 "no support for partial mask on"
942 " \"hdr.next_proto_id\" field");
943 else if (mask.ipv4->hdr.next_proto_id)
945 ((const struct rte_flow_item_ipv4 *)
946 (items->spec))->hdr.next_proto_id;
948 case RTE_FLOW_ITEM_TYPE_IPV6:
949 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
953 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
954 mask.ipv6 = flow_tcf_item_mask
955 (items, &rte_flow_item_ipv6_mask,
956 &flow_tcf_mask_supported.ipv6,
957 &flow_tcf_mask_empty.ipv6,
958 sizeof(flow_tcf_mask_supported.ipv6),
962 if (mask.ipv6->hdr.proto &&
963 mask.ipv6->hdr.proto != 0xff)
964 return rte_flow_error_set
966 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
968 "no support for partial mask on"
969 " \"hdr.proto\" field");
970 else if (mask.ipv6->hdr.proto)
972 ((const struct rte_flow_item_ipv6 *)
973 (items->spec))->hdr.proto;
975 case RTE_FLOW_ITEM_TYPE_UDP:
976 ret = mlx5_flow_validate_item_udp(items, item_flags,
977 next_protocol, error);
980 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
981 mask.udp = flow_tcf_item_mask
982 (items, &rte_flow_item_udp_mask,
983 &flow_tcf_mask_supported.udp,
984 &flow_tcf_mask_empty.udp,
985 sizeof(flow_tcf_mask_supported.udp),
990 case RTE_FLOW_ITEM_TYPE_TCP:
991 ret = mlx5_flow_validate_item_tcp
994 &flow_tcf_mask_supported.tcp,
998 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
999 mask.tcp = flow_tcf_item_mask
1000 (items, &rte_flow_item_tcp_mask,
1001 &flow_tcf_mask_supported.tcp,
1002 &flow_tcf_mask_empty.tcp,
1003 sizeof(flow_tcf_mask_supported.tcp),
1009 return rte_flow_error_set(error, ENOTSUP,
1010 RTE_FLOW_ERROR_TYPE_ITEM,
1011 NULL, "item not supported");
1014 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1016 uint32_t current_action_flag = 0;
1018 switch (actions->type) {
1019 case RTE_FLOW_ACTION_TYPE_VOID:
1021 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1022 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1025 conf.port_id = actions->conf;
1026 if (conf.port_id->original)
1029 for (i = 0; ptoi[i].ifindex; ++i)
1030 if (ptoi[i].port_id == conf.port_id->id)
1032 if (!ptoi[i].ifindex)
1033 return rte_flow_error_set
1035 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1037 "missing data to convert port ID to"
1039 port_id_dev = &rte_eth_devices[conf.port_id->id];
1041 case RTE_FLOW_ACTION_TYPE_JUMP:
1042 current_action_flag = MLX5_FLOW_ACTION_JUMP;
1045 conf.jump = actions->conf;
1046 if (attr->group >= conf.jump->group)
1047 return rte_flow_error_set
1049 RTE_FLOW_ERROR_TYPE_ACTION,
1051 "can jump only to a group forward");
1053 case RTE_FLOW_ACTION_TYPE_DROP:
1054 current_action_flag = MLX5_FLOW_ACTION_DROP;
1056 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1057 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1059 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1060 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1062 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1063 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1064 return rte_flow_error_set
1066 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1067 "vlan modify is not supported,"
1068 " set action must follow push action");
1069 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1071 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1072 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1073 return rte_flow_error_set
1075 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1076 "vlan modify is not supported,"
1077 " set action must follow push action");
1078 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1080 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1081 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1083 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1084 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1086 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1087 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1089 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1090 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1092 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1093 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1095 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1096 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1099 return rte_flow_error_set(error, ENOTSUP,
1100 RTE_FLOW_ERROR_TYPE_ACTION,
1102 "action not supported");
1104 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1106 return rte_flow_error_set(error, EINVAL,
1107 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1109 "action configuration not set");
1111 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1113 return rte_flow_error_set(error, ENOTSUP,
1114 RTE_FLOW_ERROR_TYPE_ACTION,
1116 "set actions should be "
1117 "listed successively");
1118 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1119 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1120 pedit_validated = 1;
1121 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1122 (action_flags & MLX5_TCF_FATE_ACTIONS))
1123 return rte_flow_error_set(error, EINVAL,
1124 RTE_FLOW_ERROR_TYPE_ACTION,
1126 "can't have multiple fate"
1128 action_flags |= current_action_flag;
1130 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1131 (action_flags & MLX5_FLOW_ACTION_DROP))
1132 return rte_flow_error_set(error, ENOTSUP,
1133 RTE_FLOW_ERROR_TYPE_ACTION,
1135 "set action is not compatible with "
1137 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1138 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1139 return rte_flow_error_set(error, ENOTSUP,
1140 RTE_FLOW_ERROR_TYPE_ACTION,
1142 "set action must be followed by "
1145 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1146 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1147 return rte_flow_error_set(error, EINVAL,
1148 RTE_FLOW_ERROR_TYPE_ACTION,
1150 "no ipv4 item found in"
1154 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1155 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1156 return rte_flow_error_set(error, EINVAL,
1157 RTE_FLOW_ERROR_TYPE_ACTION,
1159 "no ipv6 item found in"
1163 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1165 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1166 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1167 return rte_flow_error_set(error, EINVAL,
1168 RTE_FLOW_ERROR_TYPE_ACTION,
1170 "no TCP/UDP item found in"
1174 * FW syndrome (0xA9C090):
1175 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1176 * forward to the uplink.
1178 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1179 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1180 ((struct priv *)port_id_dev->data->dev_private)->representor)
1181 return rte_flow_error_set(error, ENOTSUP,
1182 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1183 "vlan push can only be applied"
1184 " when forwarding to uplink port");
1186 * FW syndrome (0x294609):
1187 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1188 * are supported only while forwarding to vport.
1190 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1191 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1192 return rte_flow_error_set(error, ENOTSUP,
1193 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1194 "vlan actions are supported"
1195 " only with port_id action");
1196 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1197 return rte_flow_error_set(error, EINVAL,
1198 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1199 "no fate action is found");
1204 * Calculate maximum size of memory for flow items of Linux TC flower and
1205 * extract specified items.
1208 * Pointer to the list of items.
1209 * @param[out] item_flags
1210 * Pointer to the detected items.
1213 * Maximum size of memory for items.
1216 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1217 const struct rte_flow_item items[],
1218 uint64_t *item_flags)
1223 size += SZ_NLATTR_STRZ_OF("flower") +
1224 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1225 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1226 if (attr->group > 0)
1227 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1228 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1229 switch (items->type) {
1230 case RTE_FLOW_ITEM_TYPE_VOID:
1232 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1234 case RTE_FLOW_ITEM_TYPE_ETH:
1235 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1236 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1237 /* dst/src MAC addr and mask. */
1238 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1240 case RTE_FLOW_ITEM_TYPE_VLAN:
1241 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1242 SZ_NLATTR_TYPE_OF(uint16_t) +
1243 /* VLAN Ether type. */
1244 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1245 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1246 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1248 case RTE_FLOW_ITEM_TYPE_IPV4:
1249 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1250 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1251 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1252 /* dst/src IP addr and mask. */
1253 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1255 case RTE_FLOW_ITEM_TYPE_IPV6:
1256 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1257 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1258 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1259 /* dst/src IP addr and mask. */
1260 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1262 case RTE_FLOW_ITEM_TYPE_UDP:
1263 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1264 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1265 /* dst/src port and mask. */
1266 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1268 case RTE_FLOW_ITEM_TYPE_TCP:
1269 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1270 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1271 /* dst/src port and mask. */
1272 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1276 "unsupported item %p type %d,"
1277 " items must be validated before flow creation",
1278 (const void *)items, items->type);
1282 *item_flags = flags;
1287 * Calculate maximum size of memory for flow actions of Linux TC flower and
1288 * extract specified actions.
1290 * @param[in] actions
1291 * Pointer to the list of actions.
1292 * @param[out] action_flags
1293 * Pointer to the detected actions.
1296 * Maximum size of memory for actions.
1299 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1300 uint64_t *action_flags)
1305 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1306 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1307 switch (actions->type) {
1308 case RTE_FLOW_ACTION_TYPE_VOID:
1310 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1311 size += SZ_NLATTR_NEST + /* na_act_index. */
1312 SZ_NLATTR_STRZ_OF("mirred") +
1313 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1314 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1315 flags |= MLX5_FLOW_ACTION_PORT_ID;
1317 case RTE_FLOW_ACTION_TYPE_JUMP:
1318 size += SZ_NLATTR_NEST + /* na_act_index. */
1319 SZ_NLATTR_STRZ_OF("gact") +
1320 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1321 SZ_NLATTR_TYPE_OF(struct tc_gact);
1322 flags |= MLX5_FLOW_ACTION_JUMP;
1324 case RTE_FLOW_ACTION_TYPE_DROP:
1325 size += SZ_NLATTR_NEST + /* na_act_index. */
1326 SZ_NLATTR_STRZ_OF("gact") +
1327 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1328 SZ_NLATTR_TYPE_OF(struct tc_gact);
1329 flags |= MLX5_FLOW_ACTION_DROP;
1331 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1332 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1333 goto action_of_vlan;
1334 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1335 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1336 goto action_of_vlan;
1337 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1338 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1339 goto action_of_vlan;
1340 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1341 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1342 goto action_of_vlan;
1344 size += SZ_NLATTR_NEST + /* na_act_index. */
1345 SZ_NLATTR_STRZ_OF("vlan") +
1346 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1347 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1348 SZ_NLATTR_TYPE_OF(uint16_t) +
1349 /* VLAN protocol. */
1350 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1351 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1353 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1354 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1355 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1356 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1357 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1358 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1359 size += flow_tcf_get_pedit_actions_size(&actions,
1364 "unsupported action %p type %d,"
1365 " items must be validated before flow creation",
1366 (const void *)actions, actions->type);
1370 *action_flags = flags;
1375 * Brand rtnetlink buffer with unique handle.
1377 * This handle should be unique for a given network interface to avoid
1381 * Pointer to Netlink message.
1383 * Unique 32-bit handle to use.
1386 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1388 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1390 tcm->tcm_handle = handle;
1391 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1392 (void *)nlh, handle);
1396 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1397 * memory required, allocates the memory, initializes Netlink message headers
1398 * and set unique TC message handle.
1401 * Pointer to the flow attributes.
1403 * Pointer to the list of items.
1404 * @param[in] actions
1405 * Pointer to the list of actions.
1406 * @param[out] item_flags
1407 * Pointer to bit mask of all items detected.
1408 * @param[out] action_flags
1409 * Pointer to bit mask of all actions detected.
1411 * Pointer to the error structure.
1414 * Pointer to mlx5_flow object on success,
1415 * otherwise NULL and rte_ernno is set.
1417 static struct mlx5_flow *
1418 flow_tcf_prepare(const struct rte_flow_attr *attr,
1419 const struct rte_flow_item items[],
1420 const struct rte_flow_action actions[],
1421 uint64_t *item_flags, uint64_t *action_flags,
1422 struct rte_flow_error *error)
1424 size_t size = sizeof(struct mlx5_flow) +
1425 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1426 MNL_ALIGN(sizeof(struct tcmsg));
1427 struct mlx5_flow *dev_flow;
1428 struct nlmsghdr *nlh;
1431 size += flow_tcf_get_items_and_size(attr, items, item_flags);
1432 size += flow_tcf_get_actions_and_size(actions, action_flags);
1433 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1435 rte_flow_error_set(error, ENOMEM,
1436 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1437 "not enough memory to create E-Switch flow");
1440 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1441 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1442 *dev_flow = (struct mlx5_flow){
1443 .tcf = (struct mlx5_flow_tcf){
1449 * Generate a reasonably unique handle based on the address of the
1452 * This is straightforward on 32-bit systems where the flow pointer can
1453 * be used directly. Otherwise, its least significant part is taken
1454 * after shifting it by the previous power of two of the pointed buffer
1457 if (sizeof(dev_flow) <= 4)
1458 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1460 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1461 rte_log2_u32(rte_align32prevpow2(size)));
1466 * Translate flow for Linux TC flower and construct Netlink message.
1469 * Pointer to the priv structure.
1470 * @param[in, out] flow
1471 * Pointer to the sub flow.
1473 * Pointer to the flow attributes.
1475 * Pointer to the list of items.
1476 * @param[in] actions
1477 * Pointer to the list of actions.
1479 * Pointer to the error structure.
1482 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1485 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1486 const struct rte_flow_attr *attr,
1487 const struct rte_flow_item items[],
1488 const struct rte_flow_action actions[],
1489 struct rte_flow_error *error)
1492 const struct rte_flow_item_port_id *port_id;
1493 const struct rte_flow_item_eth *eth;
1494 const struct rte_flow_item_vlan *vlan;
1495 const struct rte_flow_item_ipv4 *ipv4;
1496 const struct rte_flow_item_ipv6 *ipv6;
1497 const struct rte_flow_item_tcp *tcp;
1498 const struct rte_flow_item_udp *udp;
1501 const struct rte_flow_action_port_id *port_id;
1502 const struct rte_flow_action_jump *jump;
1503 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1504 const struct rte_flow_action_of_set_vlan_vid *
1506 const struct rte_flow_action_of_set_vlan_pcp *
1509 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1510 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1511 struct tcmsg *tcm = dev_flow->tcf.tcm;
1512 uint32_t na_act_index_cur;
1513 bool eth_type_set = 0;
1514 bool vlan_present = 0;
1515 bool vlan_eth_type_set = 0;
1516 bool ip_proto_set = 0;
1517 struct nlattr *na_flower;
1518 struct nlattr *na_flower_act;
1519 struct nlattr *na_vlan_id = NULL;
1520 struct nlattr *na_vlan_priority = NULL;
1521 uint64_t item_flags = 0;
1523 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1524 PTOI_TABLE_SZ_MAX(dev)));
1525 nlh = dev_flow->tcf.nlh;
1526 tcm = dev_flow->tcf.tcm;
1527 /* Prepare API must have been called beforehand. */
1528 assert(nlh != NULL && tcm != NULL);
1529 tcm->tcm_family = AF_UNSPEC;
1530 tcm->tcm_ifindex = ptoi[0].ifindex;
1531 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1533 * Priority cannot be zero to prevent the kernel from picking one
1536 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1537 RTE_BE16(ETH_P_ALL));
1538 if (attr->group > 0)
1539 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1540 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1541 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1542 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1543 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1546 switch (items->type) {
1547 case RTE_FLOW_ITEM_TYPE_VOID:
1549 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1550 mask.port_id = flow_tcf_item_mask
1551 (items, &rte_flow_item_port_id_mask,
1552 &flow_tcf_mask_supported.port_id,
1553 &flow_tcf_mask_empty.port_id,
1554 sizeof(flow_tcf_mask_supported.port_id),
1556 assert(mask.port_id);
1557 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1559 spec.port_id = items->spec;
1560 if (!mask.port_id->id)
1563 for (i = 0; ptoi[i].ifindex; ++i)
1564 if (ptoi[i].port_id == spec.port_id->id)
1566 assert(ptoi[i].ifindex);
1567 tcm->tcm_ifindex = ptoi[i].ifindex;
1569 case RTE_FLOW_ITEM_TYPE_ETH:
1570 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1571 mask.eth = flow_tcf_item_mask
1572 (items, &rte_flow_item_eth_mask,
1573 &flow_tcf_mask_supported.eth,
1574 &flow_tcf_mask_empty.eth,
1575 sizeof(flow_tcf_mask_supported.eth),
1578 if (mask.eth == &flow_tcf_mask_empty.eth)
1580 spec.eth = items->spec;
1581 if (mask.eth->type) {
1582 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1586 if (!is_zero_ether_addr(&mask.eth->dst)) {
1587 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1589 spec.eth->dst.addr_bytes);
1590 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1592 mask.eth->dst.addr_bytes);
1594 if (!is_zero_ether_addr(&mask.eth->src)) {
1595 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1597 spec.eth->src.addr_bytes);
1598 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1600 mask.eth->src.addr_bytes);
1603 case RTE_FLOW_ITEM_TYPE_VLAN:
1604 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1605 mask.vlan = flow_tcf_item_mask
1606 (items, &rte_flow_item_vlan_mask,
1607 &flow_tcf_mask_supported.vlan,
1608 &flow_tcf_mask_empty.vlan,
1609 sizeof(flow_tcf_mask_supported.vlan),
1613 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1614 RTE_BE16(ETH_P_8021Q));
1617 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1619 spec.vlan = items->spec;
1620 if (mask.vlan->inner_type) {
1621 mnl_attr_put_u16(nlh,
1622 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1623 spec.vlan->inner_type);
1624 vlan_eth_type_set = 1;
1626 if (mask.vlan->tci & RTE_BE16(0xe000))
1627 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1629 (spec.vlan->tci) >> 13) & 0x7);
1630 if (mask.vlan->tci & RTE_BE16(0x0fff))
1631 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1636 case RTE_FLOW_ITEM_TYPE_IPV4:
1637 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1638 mask.ipv4 = flow_tcf_item_mask
1639 (items, &rte_flow_item_ipv4_mask,
1640 &flow_tcf_mask_supported.ipv4,
1641 &flow_tcf_mask_empty.ipv4,
1642 sizeof(flow_tcf_mask_supported.ipv4),
1645 if (!eth_type_set || !vlan_eth_type_set)
1646 mnl_attr_put_u16(nlh,
1648 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1649 TCA_FLOWER_KEY_ETH_TYPE,
1650 RTE_BE16(ETH_P_IP));
1652 vlan_eth_type_set = 1;
1653 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1655 spec.ipv4 = items->spec;
1656 if (mask.ipv4->hdr.next_proto_id) {
1657 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1658 spec.ipv4->hdr.next_proto_id);
1661 if (mask.ipv4->hdr.src_addr) {
1662 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1663 spec.ipv4->hdr.src_addr);
1664 mnl_attr_put_u32(nlh,
1665 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1666 mask.ipv4->hdr.src_addr);
1668 if (mask.ipv4->hdr.dst_addr) {
1669 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1670 spec.ipv4->hdr.dst_addr);
1671 mnl_attr_put_u32(nlh,
1672 TCA_FLOWER_KEY_IPV4_DST_MASK,
1673 mask.ipv4->hdr.dst_addr);
1676 case RTE_FLOW_ITEM_TYPE_IPV6:
1677 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1678 mask.ipv6 = flow_tcf_item_mask
1679 (items, &rte_flow_item_ipv6_mask,
1680 &flow_tcf_mask_supported.ipv6,
1681 &flow_tcf_mask_empty.ipv6,
1682 sizeof(flow_tcf_mask_supported.ipv6),
1685 if (!eth_type_set || !vlan_eth_type_set)
1686 mnl_attr_put_u16(nlh,
1688 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1689 TCA_FLOWER_KEY_ETH_TYPE,
1690 RTE_BE16(ETH_P_IPV6));
1692 vlan_eth_type_set = 1;
1693 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1695 spec.ipv6 = items->spec;
1696 if (mask.ipv6->hdr.proto) {
1697 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1698 spec.ipv6->hdr.proto);
1701 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1702 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1703 sizeof(spec.ipv6->hdr.src_addr),
1704 spec.ipv6->hdr.src_addr);
1705 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1706 sizeof(mask.ipv6->hdr.src_addr),
1707 mask.ipv6->hdr.src_addr);
1709 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1710 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1711 sizeof(spec.ipv6->hdr.dst_addr),
1712 spec.ipv6->hdr.dst_addr);
1713 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1714 sizeof(mask.ipv6->hdr.dst_addr),
1715 mask.ipv6->hdr.dst_addr);
1718 case RTE_FLOW_ITEM_TYPE_UDP:
1719 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1720 mask.udp = flow_tcf_item_mask
1721 (items, &rte_flow_item_udp_mask,
1722 &flow_tcf_mask_supported.udp,
1723 &flow_tcf_mask_empty.udp,
1724 sizeof(flow_tcf_mask_supported.udp),
1728 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1730 if (mask.udp == &flow_tcf_mask_empty.udp)
1732 spec.udp = items->spec;
1733 if (mask.udp->hdr.src_port) {
1734 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1735 spec.udp->hdr.src_port);
1736 mnl_attr_put_u16(nlh,
1737 TCA_FLOWER_KEY_UDP_SRC_MASK,
1738 mask.udp->hdr.src_port);
1740 if (mask.udp->hdr.dst_port) {
1741 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1742 spec.udp->hdr.dst_port);
1743 mnl_attr_put_u16(nlh,
1744 TCA_FLOWER_KEY_UDP_DST_MASK,
1745 mask.udp->hdr.dst_port);
1748 case RTE_FLOW_ITEM_TYPE_TCP:
1749 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1750 mask.tcp = flow_tcf_item_mask
1751 (items, &rte_flow_item_tcp_mask,
1752 &flow_tcf_mask_supported.tcp,
1753 &flow_tcf_mask_empty.tcp,
1754 sizeof(flow_tcf_mask_supported.tcp),
1758 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1760 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1762 spec.tcp = items->spec;
1763 if (mask.tcp->hdr.src_port) {
1764 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1765 spec.tcp->hdr.src_port);
1766 mnl_attr_put_u16(nlh,
1767 TCA_FLOWER_KEY_TCP_SRC_MASK,
1768 mask.tcp->hdr.src_port);
1770 if (mask.tcp->hdr.dst_port) {
1771 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1772 spec.tcp->hdr.dst_port);
1773 mnl_attr_put_u16(nlh,
1774 TCA_FLOWER_KEY_TCP_DST_MASK,
1775 mask.tcp->hdr.dst_port);
1777 if (mask.tcp->hdr.tcp_flags) {
1780 TCA_FLOWER_KEY_TCP_FLAGS,
1782 (spec.tcp->hdr.tcp_flags));
1785 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1787 (mask.tcp->hdr.tcp_flags));
1791 return rte_flow_error_set(error, ENOTSUP,
1792 RTE_FLOW_ERROR_TYPE_ITEM,
1793 NULL, "item not supported");
1796 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1797 na_act_index_cur = 1;
1798 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1799 struct nlattr *na_act_index;
1800 struct nlattr *na_act;
1801 unsigned int vlan_act;
1804 switch (actions->type) {
1805 case RTE_FLOW_ACTION_TYPE_VOID:
1807 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1808 conf.port_id = actions->conf;
1809 if (conf.port_id->original)
1812 for (i = 0; ptoi[i].ifindex; ++i)
1813 if (ptoi[i].port_id == conf.port_id->id)
1815 assert(ptoi[i].ifindex);
1817 mnl_attr_nest_start(nlh, na_act_index_cur++);
1818 assert(na_act_index);
1819 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1820 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1822 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1823 sizeof(struct tc_mirred),
1824 &(struct tc_mirred){
1825 .action = TC_ACT_STOLEN,
1826 .eaction = TCA_EGRESS_REDIR,
1827 .ifindex = ptoi[i].ifindex,
1829 mnl_attr_nest_end(nlh, na_act);
1830 mnl_attr_nest_end(nlh, na_act_index);
1832 case RTE_FLOW_ACTION_TYPE_JUMP:
1833 conf.jump = actions->conf;
1835 mnl_attr_nest_start(nlh, na_act_index_cur++);
1836 assert(na_act_index);
1837 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1838 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1840 mnl_attr_put(nlh, TCA_GACT_PARMS,
1841 sizeof(struct tc_gact),
1843 .action = TC_ACT_GOTO_CHAIN |
1846 mnl_attr_nest_end(nlh, na_act);
1847 mnl_attr_nest_end(nlh, na_act_index);
1849 case RTE_FLOW_ACTION_TYPE_DROP:
1851 mnl_attr_nest_start(nlh, na_act_index_cur++);
1852 assert(na_act_index);
1853 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1854 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1856 mnl_attr_put(nlh, TCA_GACT_PARMS,
1857 sizeof(struct tc_gact),
1859 .action = TC_ACT_SHOT,
1861 mnl_attr_nest_end(nlh, na_act);
1862 mnl_attr_nest_end(nlh, na_act_index);
1864 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1865 conf.of_push_vlan = NULL;
1866 vlan_act = TCA_VLAN_ACT_POP;
1867 goto action_of_vlan;
1868 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1869 conf.of_push_vlan = actions->conf;
1870 vlan_act = TCA_VLAN_ACT_PUSH;
1871 goto action_of_vlan;
1872 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1873 conf.of_set_vlan_vid = actions->conf;
1875 goto override_na_vlan_id;
1876 vlan_act = TCA_VLAN_ACT_MODIFY;
1877 goto action_of_vlan;
1878 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1879 conf.of_set_vlan_pcp = actions->conf;
1880 if (na_vlan_priority)
1881 goto override_na_vlan_priority;
1882 vlan_act = TCA_VLAN_ACT_MODIFY;
1883 goto action_of_vlan;
1886 mnl_attr_nest_start(nlh, na_act_index_cur++);
1887 assert(na_act_index);
1888 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1889 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1891 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1892 sizeof(struct tc_vlan),
1894 .action = TC_ACT_PIPE,
1895 .v_action = vlan_act,
1897 if (vlan_act == TCA_VLAN_ACT_POP) {
1898 mnl_attr_nest_end(nlh, na_act);
1899 mnl_attr_nest_end(nlh, na_act_index);
1902 if (vlan_act == TCA_VLAN_ACT_PUSH)
1903 mnl_attr_put_u16(nlh,
1904 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1905 conf.of_push_vlan->ethertype);
1906 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1907 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1908 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1909 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1910 mnl_attr_nest_end(nlh, na_act);
1911 mnl_attr_nest_end(nlh, na_act_index);
1912 if (actions->type ==
1913 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1914 override_na_vlan_id:
1915 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1916 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1918 (conf.of_set_vlan_vid->vlan_vid);
1919 } else if (actions->type ==
1920 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1921 override_na_vlan_priority:
1922 na_vlan_priority->nla_type =
1923 TCA_VLAN_PUSH_VLAN_PRIORITY;
1924 *(uint8_t *)mnl_attr_get_payload
1925 (na_vlan_priority) =
1926 conf.of_set_vlan_pcp->vlan_pcp;
1929 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1930 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1931 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1932 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1933 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1934 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1936 mnl_attr_nest_start(nlh, na_act_index_cur++);
1937 flow_tcf_create_pedit_mnl_msg(nlh,
1938 &actions, item_flags);
1939 mnl_attr_nest_end(nlh, na_act_index);
1942 return rte_flow_error_set(error, ENOTSUP,
1943 RTE_FLOW_ERROR_TYPE_ACTION,
1945 "action not supported");
1949 assert(na_flower_act);
1950 mnl_attr_nest_end(nlh, na_flower_act);
1951 mnl_attr_nest_end(nlh, na_flower);
1956 * Send Netlink message with acknowledgment.
1959 * Libmnl socket to use.
1961 * Message to send. This function always raises the NLM_F_ACK flag before
1965 * 0 on success, a negative errno value otherwise and rte_errno is set.
1968 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1970 alignas(struct nlmsghdr)
1971 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1972 nlh->nlmsg_len - sizeof(*nlh)];
1973 uint32_t seq = random();
1976 nlh->nlmsg_flags |= NLM_F_ACK;
1977 nlh->nlmsg_seq = seq;
1978 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1980 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1983 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1991 * Apply flow to E-Switch by sending Netlink message.
1994 * Pointer to Ethernet device.
1995 * @param[in, out] flow
1996 * Pointer to the sub flow.
1998 * Pointer to the error structure.
2001 * 0 on success, a negative errno value otherwise and rte_ernno is set.
2004 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2005 struct rte_flow_error *error)
2007 struct priv *priv = dev->data->dev_private;
2008 struct mnl_socket *nl = priv->mnl_socket;
2009 struct mlx5_flow *dev_flow;
2010 struct nlmsghdr *nlh;
2012 dev_flow = LIST_FIRST(&flow->dev_flows);
2013 /* E-Switch flow can't be expanded. */
2014 assert(!LIST_NEXT(dev_flow, next));
2015 nlh = dev_flow->tcf.nlh;
2016 nlh->nlmsg_type = RTM_NEWTFILTER;
2017 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2018 if (!flow_tcf_nl_ack(nl, nlh))
2020 return rte_flow_error_set(error, rte_errno,
2021 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2022 "netlink: failed to create TC flow rule");
2026 * Remove flow from E-Switch by sending Netlink message.
2029 * Pointer to Ethernet device.
2030 * @param[in, out] flow
2031 * Pointer to the sub flow.
2034 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2036 struct priv *priv = dev->data->dev_private;
2037 struct mnl_socket *nl = priv->mnl_socket;
2038 struct mlx5_flow *dev_flow;
2039 struct nlmsghdr *nlh;
2043 dev_flow = LIST_FIRST(&flow->dev_flows);
2046 /* E-Switch flow can't be expanded. */
2047 assert(!LIST_NEXT(dev_flow, next));
2048 nlh = dev_flow->tcf.nlh;
2049 nlh->nlmsg_type = RTM_DELTFILTER;
2050 nlh->nlmsg_flags = NLM_F_REQUEST;
2051 flow_tcf_nl_ack(nl, nlh);
2055 * Remove flow from E-Switch and release resources of the device flow.
2058 * Pointer to Ethernet device.
2059 * @param[in, out] flow
2060 * Pointer to the sub flow.
2063 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2065 struct mlx5_flow *dev_flow;
2069 flow_tcf_remove(dev, flow);
2070 dev_flow = LIST_FIRST(&flow->dev_flows);
2073 /* E-Switch flow can't be expanded. */
2074 assert(!LIST_NEXT(dev_flow, next));
2075 LIST_REMOVE(dev_flow, next);
2079 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2080 .validate = flow_tcf_validate,
2081 .prepare = flow_tcf_prepare,
2082 .translate = flow_tcf_translate,
2083 .apply = flow_tcf_apply,
2084 .remove = flow_tcf_remove,
2085 .destroy = flow_tcf_destroy,
2089 * Initialize ingress qdisc of a given network interface.
2092 * Libmnl socket of the @p NETLINK_ROUTE kind.
2094 * Index of network interface to initialize.
2096 * Perform verbose error reporting if not NULL.
2099 * 0 on success, a negative errno value otherwise and rte_errno is set.
2102 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2103 struct rte_flow_error *error)
2105 struct nlmsghdr *nlh;
2107 alignas(struct nlmsghdr)
2108 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2110 /* Destroy existing ingress qdisc and everything attached to it. */
2111 nlh = mnl_nlmsg_put_header(buf);
2112 nlh->nlmsg_type = RTM_DELQDISC;
2113 nlh->nlmsg_flags = NLM_F_REQUEST;
2114 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2115 tcm->tcm_family = AF_UNSPEC;
2116 tcm->tcm_ifindex = ifindex;
2117 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2118 tcm->tcm_parent = TC_H_INGRESS;
2119 /* Ignore errors when qdisc is already absent. */
2120 if (flow_tcf_nl_ack(nl, nlh) &&
2121 rte_errno != EINVAL && rte_errno != ENOENT)
2122 return rte_flow_error_set(error, rte_errno,
2123 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2124 "netlink: failed to remove ingress"
2126 /* Create fresh ingress qdisc. */
2127 nlh = mnl_nlmsg_put_header(buf);
2128 nlh->nlmsg_type = RTM_NEWQDISC;
2129 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2130 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2131 tcm->tcm_family = AF_UNSPEC;
2132 tcm->tcm_ifindex = ifindex;
2133 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2134 tcm->tcm_parent = TC_H_INGRESS;
2135 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2136 if (flow_tcf_nl_ack(nl, nlh))
2137 return rte_flow_error_set(error, rte_errno,
2138 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2139 "netlink: failed to create ingress"
2145 * Create and configure a libmnl socket for Netlink flow rules.
2148 * A valid libmnl socket object pointer on success, NULL otherwise and
2152 mlx5_flow_tcf_socket_create(void)
2154 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2157 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2159 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2164 mnl_socket_close(nl);
2169 * Destroy a libmnl socket.
2172 * Libmnl socket of the @p NETLINK_ROUTE kind.
2175 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2177 mnl_socket_close(nl);