1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 #ifdef HAVE_TC_ACT_PEDIT
58 #include <linux/tc_act/tc_pedit.h>
60 #else /* HAVE_TC_ACT_VLAN */
74 TCA_PEDIT_KEY_EX_HTYPE = 1,
75 TCA_PEDIT_KEY_EX_CMD = 2,
76 __TCA_PEDIT_KEY_EX_MAX
79 enum pedit_header_type {
80 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
90 TCA_PEDIT_KEY_EX_CMD_SET = 0,
91 TCA_PEDIT_KEY_EX_CMD_ADD = 1,
98 __u32 off; /*offset */
104 struct tc_pedit_sel {
108 struct tc_pedit_key keys[0];
111 #endif /* HAVE_TC_ACT_VLAN */
113 /* Normally found in linux/netlink.h. */
114 #ifndef NETLINK_CAP_ACK
115 #define NETLINK_CAP_ACK 10
118 /* Normally found in linux/pkt_sched.h. */
119 #ifndef TC_H_MIN_INGRESS
120 #define TC_H_MIN_INGRESS 0xfff2u
123 /* Normally found in linux/pkt_cls.h. */
124 #ifndef TCA_CLS_FLAGS_SKIP_SW
125 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #ifndef HAVE_TCA_FLOWER_ACT
128 #define TCA_FLOWER_ACT 3
130 #ifndef HAVE_TCA_FLOWER_FLAGS
131 #define TCA_FLOWER_FLAGS 22
133 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
134 #define TCA_FLOWER_KEY_ETH_TYPE 8
136 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
137 #define TCA_FLOWER_KEY_ETH_DST 4
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
140 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
143 #define TCA_FLOWER_KEY_ETH_SRC 6
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
146 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
148 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
149 #define TCA_FLOWER_KEY_IP_PROTO 9
151 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
152 #define TCA_FLOWER_KEY_IPV4_SRC 10
154 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
155 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
158 #define TCA_FLOWER_KEY_IPV4_DST 12
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
161 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
164 #define TCA_FLOWER_KEY_IPV6_SRC 14
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
167 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
170 #define TCA_FLOWER_KEY_IPV6_DST 16
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
173 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
175 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
176 #define TCA_FLOWER_KEY_TCP_SRC 18
178 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
179 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
182 #define TCA_FLOWER_KEY_TCP_DST 19
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
185 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
187 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
188 #define TCA_FLOWER_KEY_UDP_SRC 20
190 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
191 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
194 #define TCA_FLOWER_KEY_UDP_DST 21
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
197 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
199 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
200 #define TCA_FLOWER_KEY_VLAN_ID 23
202 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
203 #define TCA_FLOWER_KEY_VLAN_PRIO 24
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
206 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
208 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
209 #define TCA_FLOWER_KEY_TCP_FLAGS 71
211 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
212 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
215 #ifndef IPV6_ADDR_LEN
216 #define IPV6_ADDR_LEN 16
219 #ifndef IPV4_ADDR_LEN
220 #define IPV4_ADDR_LEN 4
224 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
227 /** Empty masks for known item types. */
229 struct rte_flow_item_port_id port_id;
230 struct rte_flow_item_eth eth;
231 struct rte_flow_item_vlan vlan;
232 struct rte_flow_item_ipv4 ipv4;
233 struct rte_flow_item_ipv6 ipv6;
234 struct rte_flow_item_tcp tcp;
235 struct rte_flow_item_udp udp;
236 } flow_tcf_mask_empty;
238 /** Supported masks for known item types. */
239 static const struct {
240 struct rte_flow_item_port_id port_id;
241 struct rte_flow_item_eth eth;
242 struct rte_flow_item_vlan vlan;
243 struct rte_flow_item_ipv4 ipv4;
244 struct rte_flow_item_ipv6 ipv6;
245 struct rte_flow_item_tcp tcp;
246 struct rte_flow_item_udp udp;
247 } flow_tcf_mask_supported = {
252 .type = RTE_BE16(0xffff),
253 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
254 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
257 /* PCP and VID only, no DEI. */
258 .tci = RTE_BE16(0xefff),
259 .inner_type = RTE_BE16(0xffff),
262 .next_proto_id = 0xff,
263 .src_addr = RTE_BE32(0xffffffff),
264 .dst_addr = RTE_BE32(0xffffffff),
269 "\xff\xff\xff\xff\xff\xff\xff\xff"
270 "\xff\xff\xff\xff\xff\xff\xff\xff",
272 "\xff\xff\xff\xff\xff\xff\xff\xff"
273 "\xff\xff\xff\xff\xff\xff\xff\xff",
276 .src_port = RTE_BE16(0xffff),
277 .dst_port = RTE_BE16(0xffff),
281 .src_port = RTE_BE16(0xffff),
282 .dst_port = RTE_BE16(0xffff),
286 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
287 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
288 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
289 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
290 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
292 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
294 /** DPDK port to network interface index (ifindex) conversion. */
295 struct flow_tcf_ptoi {
296 uint16_t port_id; /**< DPDK port ID. */
297 unsigned int ifindex; /**< Network interface index. */
300 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
301 #define MLX5_TCF_VLAN_ACTIONS \
302 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
303 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
305 #define MLX5_TCF_PEDIT_ACTIONS \
306 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
307 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
308 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
310 #define MLX5_TCF_CONFIG_ACTIONS \
311 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
312 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
313 MLX5_TCF_PEDIT_ACTIONS)
315 #define MAX_PEDIT_KEYS 128
316 #define SZ_PEDIT_KEY_VAL 4
318 #define NUM_OF_PEDIT_KEYS(sz) \
319 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
321 struct pedit_key_ex {
322 enum pedit_header_type htype;
326 struct pedit_parser {
327 struct tc_pedit_sel sel;
328 struct tc_pedit_key keys[MAX_PEDIT_KEYS];
329 struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
334 * Set pedit key of transport (TCP/UDP) port value
337 * pointer to action specification
338 * @param[in,out] p_parser
339 * pointer to pedit_parser
340 * @param[in] item_flags
341 * flags of all items presented
344 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
345 struct pedit_parser *p_parser,
348 int idx = p_parser->sel.nkeys;
350 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
351 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
352 if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
353 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
354 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
355 /* offset of src/dst port is same for TCP and UDP */
356 p_parser->keys[idx].off =
357 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
358 offsetof(struct tcp_hdr, src_port) :
359 offsetof(struct tcp_hdr, dst_port);
360 p_parser->keys[idx].mask = 0xFFFF0000;
361 p_parser->keys[idx].val =
362 (__u32)((const struct rte_flow_action_set_tp *)
363 actions->conf)->port;
364 p_parser->sel.nkeys = (++idx);
368 * Set pedit key of ipv6 address
371 * pointer to action specification
372 * @param[in,out] p_parser
373 * pointer to pedit_parser
376 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
377 struct pedit_parser *p_parser)
379 int idx = p_parser->sel.nkeys;
380 int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
382 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
383 offsetof(struct ipv6_hdr, src_addr) :
384 offsetof(struct ipv6_hdr, dst_addr);
385 const struct rte_flow_action_set_ipv6 *conf =
386 (const struct rte_flow_action_set_ipv6 *)actions->conf;
388 for (int i = 0; i < keys; i++, idx++) {
389 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
390 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
391 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
392 p_parser->keys[idx].mask = ~UINT32_MAX;
393 memcpy(&p_parser->keys[idx].val,
394 conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
397 p_parser->sel.nkeys += keys;
401 * Set pedit key of ipv4 address
404 * pointer to action specification
405 * @param[in,out] p_parser
406 * pointer to pedit_parser
409 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
410 struct pedit_parser *p_parser)
412 int idx = p_parser->sel.nkeys;
414 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
415 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
416 p_parser->keys[idx].off =
417 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
418 offsetof(struct ipv4_hdr, src_addr) :
419 offsetof(struct ipv4_hdr, dst_addr);
420 p_parser->keys[idx].mask = ~UINT32_MAX;
421 p_parser->keys[idx].val =
422 ((const struct rte_flow_action_set_ipv4 *)
423 actions->conf)->ipv4_addr;
424 p_parser->sel.nkeys = (++idx);
428 * Create the pedit's na attribute in netlink message
429 * on pre-allocate message buffer
432 * pointer to pre-allocated netlink message buffer
433 * @param[in,out] actions
434 * pointer to pointer of actions specification.
435 * @param[in,out] action_flags
436 * pointer to actions flags
437 * @param[in] item_flags
438 * flags of all item presented
441 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
442 const struct rte_flow_action **actions,
445 struct pedit_parser p_parser;
446 struct nlattr *na_act_options;
447 struct nlattr *na_pedit_keys;
449 memset(&p_parser, 0, sizeof(p_parser));
450 mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
451 na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
452 /* all modify header actions should be in one tc-pedit action */
453 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
454 switch ((*actions)->type) {
455 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
456 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
457 flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
459 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
460 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
461 flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
463 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
464 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
465 flow_tcf_pedit_key_set_tp_port(*actions,
466 &p_parser, item_flags);
469 goto pedit_mnl_msg_done;
473 p_parser.sel.action = TC_ACT_PIPE;
474 mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
475 sizeof(p_parser.sel) +
476 p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
479 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
480 for (int i = 0; i < p_parser.sel.nkeys; i++) {
481 struct nlattr *na_pedit_key =
482 mnl_attr_nest_start(nl,
483 TCA_PEDIT_KEY_EX | NLA_F_NESTED);
484 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
485 p_parser.keys_ex[i].htype);
486 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
487 p_parser.keys_ex[i].cmd);
488 mnl_attr_nest_end(nl, na_pedit_key);
490 mnl_attr_nest_end(nl, na_pedit_keys);
491 mnl_attr_nest_end(nl, na_act_options);
496 * Calculate max memory size of one TC-pedit actions.
497 * One TC-pedit action can contain set of keys each defining
498 * a rewrite element (rte_flow action)
500 * @param[in,out] actions
501 * actions specification.
502 * @param[in,out] action_flags
504 * @param[in,out] size
507 * Max memory size of one TC-pedit action
510 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
511 uint64_t *action_flags)
517 pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
518 SZ_NLATTR_STRZ_OF("pedit") +
519 SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
520 for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
521 switch ((*actions)->type) {
522 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
523 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
524 flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
526 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
527 keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
528 flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
530 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
531 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
532 flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
534 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
535 keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
536 flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
538 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
539 /* TCP is as same as UDP */
540 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
541 flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
543 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
544 /* TCP is as same as UDP */
545 keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
546 flags |= MLX5_FLOW_ACTION_SET_TP_DST;
549 goto get_pedit_action_size_done;
552 get_pedit_action_size_done:
553 /* TCA_PEDIT_PARAMS_EX */
555 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
556 keys * sizeof(struct tc_pedit_key));
557 pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
559 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
560 (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
561 SZ_NLATTR_DATA_OF(2));
562 (*action_flags) |= flags;
568 * Retrieve mask for pattern item.
570 * This function does basic sanity checks on a pattern item in order to
571 * return the most appropriate mask for it.
574 * Item specification.
575 * @param[in] mask_default
576 * Default mask for pattern item as specified by the flow API.
577 * @param[in] mask_supported
578 * Mask fields supported by the implementation.
579 * @param[in] mask_empty
580 * Empty mask to return when there is no specification.
582 * Perform verbose error reporting if not NULL.
585 * Either @p item->mask or one of the mask parameters on success, NULL
586 * otherwise and rte_errno is set.
589 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
590 const void *mask_supported, const void *mask_empty,
591 size_t mask_size, struct rte_flow_error *error)
596 /* item->last and item->mask cannot exist without item->spec. */
597 if (!item->spec && (item->mask || item->last)) {
598 rte_flow_error_set(error, EINVAL,
599 RTE_FLOW_ERROR_TYPE_ITEM, item,
600 "\"mask\" or \"last\" field provided without"
601 " a corresponding \"spec\"");
604 /* No spec, no mask, no problem. */
607 mask = item->mask ? item->mask : mask_default;
610 * Single-pass check to make sure that:
611 * - Mask is supported, no bits are set outside mask_supported.
612 * - Both item->spec and item->last are included in mask.
614 for (i = 0; i != mask_size; ++i) {
617 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
618 ((const uint8_t *)mask_supported)[i]) {
619 rte_flow_error_set(error, ENOTSUP,
620 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
621 "unsupported field found"
626 (((const uint8_t *)item->spec)[i] & mask[i]) !=
627 (((const uint8_t *)item->last)[i] & mask[i])) {
628 rte_flow_error_set(error, EINVAL,
629 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
631 "range between \"spec\" and \"last\""
632 " not comprised in \"mask\"");
640 * Build a conversion table between port ID and ifindex.
643 * Pointer to Ethernet device.
645 * Pointer to ptoi table.
647 * Size of ptoi table provided.
650 * Size of ptoi table filled.
653 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
656 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
657 uint16_t port_id[n + 1];
659 unsigned int own = 0;
661 /* At least one port is needed when no switch domain is present. */
664 port_id[0] = dev->data->port_id;
666 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
670 for (i = 0; i != n; ++i) {
671 struct rte_eth_dev_info dev_info;
673 rte_eth_dev_info_get(port_id[i], &dev_info);
674 if (port_id[i] == dev->data->port_id)
676 ptoi[i].port_id = port_id[i];
677 ptoi[i].ifindex = dev_info.if_index;
679 /* Ensure first entry of ptoi[] is the current device. */
685 /* An entry with zero ifindex terminates ptoi[]. */
692 * Verify the @p attr will be correctly understood by the E-switch.
695 * Pointer to flow attributes
697 * Pointer to error structure.
700 * 0 on success, a negative errno value otherwise and rte_errno is set.
703 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
704 struct rte_flow_error *error)
707 * Supported attributes: no groups, some priorities and ingress only.
708 * Don't care about transfer as it is the caller's problem.
711 return rte_flow_error_set(error, ENOTSUP,
712 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
713 "groups are not supported");
714 if (attr->priority > 0xfffe)
715 return rte_flow_error_set(error, ENOTSUP,
716 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
718 "lowest priority level is 0xfffe");
720 return rte_flow_error_set(error, EINVAL,
721 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
722 attr, "only ingress is supported");
724 return rte_flow_error_set(error, ENOTSUP,
725 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
726 attr, "egress is not supported");
731 * Validate flow for E-Switch.
734 * Pointer to the priv structure.
736 * Pointer to the flow attributes.
738 * Pointer to the list of items.
740 * Pointer to the list of actions.
742 * Pointer to the error structure.
745 * 0 on success, a negative errno value otherwise and rte_ernno is set.
748 flow_tcf_validate(struct rte_eth_dev *dev,
749 const struct rte_flow_attr *attr,
750 const struct rte_flow_item items[],
751 const struct rte_flow_action actions[],
752 struct rte_flow_error *error)
755 const struct rte_flow_item_port_id *port_id;
756 const struct rte_flow_item_eth *eth;
757 const struct rte_flow_item_vlan *vlan;
758 const struct rte_flow_item_ipv4 *ipv4;
759 const struct rte_flow_item_ipv6 *ipv6;
760 const struct rte_flow_item_tcp *tcp;
761 const struct rte_flow_item_udp *udp;
764 const struct rte_flow_action_port_id *port_id;
765 const struct rte_flow_action_of_push_vlan *of_push_vlan;
766 const struct rte_flow_action_of_set_vlan_vid *
768 const struct rte_flow_action_of_set_vlan_pcp *
770 const struct rte_flow_action_set_ipv4 *set_ipv4;
771 const struct rte_flow_action_set_ipv6 *set_ipv6;
773 uint32_t item_flags = 0;
774 uint32_t action_flags = 0;
775 uint8_t next_protocol = -1;
776 unsigned int tcm_ifindex = 0;
777 uint8_t pedit_validated = 0;
778 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
779 struct rte_eth_dev *port_id_dev = NULL;
783 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
784 PTOI_TABLE_SZ_MAX(dev)));
785 ret = flow_tcf_validate_attributes(attr, error);
788 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
791 switch (items->type) {
792 case RTE_FLOW_ITEM_TYPE_VOID:
794 case RTE_FLOW_ITEM_TYPE_PORT_ID:
795 mask.port_id = flow_tcf_item_mask
796 (items, &rte_flow_item_port_id_mask,
797 &flow_tcf_mask_supported.port_id,
798 &flow_tcf_mask_empty.port_id,
799 sizeof(flow_tcf_mask_supported.port_id),
803 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
807 spec.port_id = items->spec;
808 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
809 return rte_flow_error_set
811 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
813 "no support for partial mask on"
815 if (!mask.port_id->id)
818 for (i = 0; ptoi[i].ifindex; ++i)
819 if (ptoi[i].port_id == spec.port_id->id)
821 if (!ptoi[i].ifindex)
822 return rte_flow_error_set
824 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
826 "missing data to convert port ID to"
828 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
829 return rte_flow_error_set
831 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
833 "cannot match traffic for"
834 " several port IDs through"
835 " a single flow rule");
836 tcm_ifindex = ptoi[i].ifindex;
839 case RTE_FLOW_ITEM_TYPE_ETH:
840 ret = mlx5_flow_validate_item_eth(items, item_flags,
844 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
846 * Redundant check due to different supported mask.
847 * Same for the rest of items.
849 mask.eth = flow_tcf_item_mask
850 (items, &rte_flow_item_eth_mask,
851 &flow_tcf_mask_supported.eth,
852 &flow_tcf_mask_empty.eth,
853 sizeof(flow_tcf_mask_supported.eth),
857 if (mask.eth->type && mask.eth->type !=
859 return rte_flow_error_set
861 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
863 "no support for partial mask on"
866 case RTE_FLOW_ITEM_TYPE_VLAN:
867 ret = mlx5_flow_validate_item_vlan(items, item_flags,
871 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
872 mask.vlan = flow_tcf_item_mask
873 (items, &rte_flow_item_vlan_mask,
874 &flow_tcf_mask_supported.vlan,
875 &flow_tcf_mask_empty.vlan,
876 sizeof(flow_tcf_mask_supported.vlan),
880 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
881 (mask.vlan->tci & RTE_BE16(0xe000)) !=
883 (mask.vlan->tci & RTE_BE16(0x0fff) &&
884 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
886 (mask.vlan->inner_type &&
887 mask.vlan->inner_type != RTE_BE16(0xffff)))
888 return rte_flow_error_set
890 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
892 "no support for partial masks on"
893 " \"tci\" (PCP and VID parts) and"
894 " \"inner_type\" fields");
896 case RTE_FLOW_ITEM_TYPE_IPV4:
897 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
901 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
902 mask.ipv4 = flow_tcf_item_mask
903 (items, &rte_flow_item_ipv4_mask,
904 &flow_tcf_mask_supported.ipv4,
905 &flow_tcf_mask_empty.ipv4,
906 sizeof(flow_tcf_mask_supported.ipv4),
910 if (mask.ipv4->hdr.next_proto_id &&
911 mask.ipv4->hdr.next_proto_id != 0xff)
912 return rte_flow_error_set
914 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
916 "no support for partial mask on"
917 " \"hdr.next_proto_id\" field");
918 else if (mask.ipv4->hdr.next_proto_id)
920 ((const struct rte_flow_item_ipv4 *)
921 (items->spec))->hdr.next_proto_id;
923 case RTE_FLOW_ITEM_TYPE_IPV6:
924 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
928 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
929 mask.ipv6 = flow_tcf_item_mask
930 (items, &rte_flow_item_ipv6_mask,
931 &flow_tcf_mask_supported.ipv6,
932 &flow_tcf_mask_empty.ipv6,
933 sizeof(flow_tcf_mask_supported.ipv6),
937 if (mask.ipv6->hdr.proto &&
938 mask.ipv6->hdr.proto != 0xff)
939 return rte_flow_error_set
941 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
943 "no support for partial mask on"
944 " \"hdr.proto\" field");
945 else if (mask.ipv6->hdr.proto)
947 ((const struct rte_flow_item_ipv6 *)
948 (items->spec))->hdr.proto;
950 case RTE_FLOW_ITEM_TYPE_UDP:
951 ret = mlx5_flow_validate_item_udp(items, item_flags,
952 next_protocol, error);
955 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
956 mask.udp = flow_tcf_item_mask
957 (items, &rte_flow_item_udp_mask,
958 &flow_tcf_mask_supported.udp,
959 &flow_tcf_mask_empty.udp,
960 sizeof(flow_tcf_mask_supported.udp),
965 case RTE_FLOW_ITEM_TYPE_TCP:
966 ret = mlx5_flow_validate_item_tcp
969 &flow_tcf_mask_supported.tcp,
973 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
974 mask.tcp = flow_tcf_item_mask
975 (items, &rte_flow_item_tcp_mask,
976 &flow_tcf_mask_supported.tcp,
977 &flow_tcf_mask_empty.tcp,
978 sizeof(flow_tcf_mask_supported.tcp),
984 return rte_flow_error_set(error, ENOTSUP,
985 RTE_FLOW_ERROR_TYPE_ITEM,
986 NULL, "item not supported");
989 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
991 uint32_t current_action_flag = 0;
993 switch (actions->type) {
994 case RTE_FLOW_ACTION_TYPE_VOID:
996 case RTE_FLOW_ACTION_TYPE_PORT_ID:
997 current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
998 if (action_flags & MLX5_TCF_FATE_ACTIONS)
999 return rte_flow_error_set
1001 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1002 "can't have multiple fate actions");
1005 conf.port_id = actions->conf;
1006 if (conf.port_id->original)
1009 for (i = 0; ptoi[i].ifindex; ++i)
1010 if (ptoi[i].port_id == conf.port_id->id)
1012 if (!ptoi[i].ifindex)
1013 return rte_flow_error_set
1015 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1017 "missing data to convert port ID to"
1019 port_id_dev = &rte_eth_devices[conf.port_id->id];
1021 case RTE_FLOW_ACTION_TYPE_DROP:
1022 if (action_flags & MLX5_TCF_FATE_ACTIONS)
1023 return rte_flow_error_set
1025 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1026 "can't have multiple fate actions");
1027 current_action_flag = MLX5_FLOW_ACTION_DROP;
1029 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1030 current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1032 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1033 current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1035 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1036 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1037 return rte_flow_error_set
1039 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1040 "vlan modify is not supported,"
1041 " set action must follow push action");
1042 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1044 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1045 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1046 return rte_flow_error_set
1048 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1049 "vlan modify is not supported,"
1050 " set action must follow push action");
1051 current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1053 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1054 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1056 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1057 current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1059 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1060 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1062 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1063 current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1065 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1066 current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1068 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1069 current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1072 return rte_flow_error_set(error, ENOTSUP,
1073 RTE_FLOW_ERROR_TYPE_ACTION,
1075 "action not supported");
1077 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1079 return rte_flow_error_set(error, EINVAL,
1080 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1082 "action configuration not set");
1084 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1086 return rte_flow_error_set(error, ENOTSUP,
1087 RTE_FLOW_ERROR_TYPE_ACTION,
1089 "set actions should be "
1090 "listed successively");
1091 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1092 (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1093 pedit_validated = 1;
1094 action_flags |= current_action_flag;
1096 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1097 (action_flags & MLX5_FLOW_ACTION_DROP))
1098 return rte_flow_error_set(error, ENOTSUP,
1099 RTE_FLOW_ERROR_TYPE_ACTION,
1101 "set action is not compatible with "
1103 if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1104 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1105 return rte_flow_error_set(error, ENOTSUP,
1106 RTE_FLOW_ERROR_TYPE_ACTION,
1108 "set action must be followed by "
1111 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1112 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1113 return rte_flow_error_set(error, EINVAL,
1114 RTE_FLOW_ERROR_TYPE_ACTION,
1116 "no ipv4 item found in"
1120 (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1121 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1122 return rte_flow_error_set(error, EINVAL,
1123 RTE_FLOW_ERROR_TYPE_ACTION,
1125 "no ipv6 item found in"
1129 (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1131 (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1132 MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1133 return rte_flow_error_set(error, EINVAL,
1134 RTE_FLOW_ERROR_TYPE_ACTION,
1136 "no TCP/UDP item found in"
1140 * FW syndrome (0xA9C090):
1141 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
1142 * forward to the uplink.
1144 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1145 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1146 ((struct priv *)port_id_dev->data->dev_private)->representor)
1147 return rte_flow_error_set(error, ENOTSUP,
1148 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1149 "vlan push can only be applied"
1150 " when forwarding to uplink port");
1152 * FW syndrome (0x294609):
1153 * set_flow_table_entry: modify/pop/push actions in fdb flow table
1154 * are supported only while forwarding to vport.
1156 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1157 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1158 return rte_flow_error_set(error, ENOTSUP,
1159 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1160 "vlan actions are supported"
1161 " only with port_id action");
1162 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1163 return rte_flow_error_set(error, EINVAL,
1164 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1165 "no fate action is found");
1170 * Calculate maximum size of memory for flow items of Linux TC flower and
1171 * extract specified items.
1174 * Pointer to the list of items.
1175 * @param[out] item_flags
1176 * Pointer to the detected items.
1179 * Maximum size of memory for items.
1182 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
1183 uint64_t *item_flags)
1188 size += SZ_NLATTR_STRZ_OF("flower") +
1189 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1190 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1191 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1192 switch (items->type) {
1193 case RTE_FLOW_ITEM_TYPE_VOID:
1195 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1197 case RTE_FLOW_ITEM_TYPE_ETH:
1198 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1199 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1200 /* dst/src MAC addr and mask. */
1201 flags |= MLX5_FLOW_LAYER_OUTER_L2;
1203 case RTE_FLOW_ITEM_TYPE_VLAN:
1204 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1205 SZ_NLATTR_TYPE_OF(uint16_t) +
1206 /* VLAN Ether type. */
1207 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1208 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1209 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1211 case RTE_FLOW_ITEM_TYPE_IPV4:
1212 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1213 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1214 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1215 /* dst/src IP addr and mask. */
1216 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1218 case RTE_FLOW_ITEM_TYPE_IPV6:
1219 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1220 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1221 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1222 /* dst/src IP addr and mask. */
1223 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1225 case RTE_FLOW_ITEM_TYPE_UDP:
1226 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1227 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1228 /* dst/src port and mask. */
1229 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1231 case RTE_FLOW_ITEM_TYPE_TCP:
1232 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1233 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1234 /* dst/src port and mask. */
1235 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1239 "unsupported item %p type %d,"
1240 " items must be validated before flow creation",
1241 (const void *)items, items->type);
1245 *item_flags = flags;
1250 * Calculate maximum size of memory for flow actions of Linux TC flower and
1251 * extract specified actions.
1253 * @param[in] actions
1254 * Pointer to the list of actions.
1255 * @param[out] action_flags
1256 * Pointer to the detected actions.
1259 * Maximum size of memory for actions.
1262 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1263 uint64_t *action_flags)
1268 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1269 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1270 switch (actions->type) {
1271 case RTE_FLOW_ACTION_TYPE_VOID:
1273 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1274 size += SZ_NLATTR_NEST + /* na_act_index. */
1275 SZ_NLATTR_STRZ_OF("mirred") +
1276 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1277 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1278 flags |= MLX5_FLOW_ACTION_PORT_ID;
1280 case RTE_FLOW_ACTION_TYPE_DROP:
1281 size += SZ_NLATTR_NEST + /* na_act_index. */
1282 SZ_NLATTR_STRZ_OF("gact") +
1283 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1284 SZ_NLATTR_TYPE_OF(struct tc_gact);
1285 flags |= MLX5_FLOW_ACTION_DROP;
1287 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1288 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1289 goto action_of_vlan;
1290 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1291 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1292 goto action_of_vlan;
1293 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1294 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1295 goto action_of_vlan;
1296 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1297 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1298 goto action_of_vlan;
1300 size += SZ_NLATTR_NEST + /* na_act_index. */
1301 SZ_NLATTR_STRZ_OF("vlan") +
1302 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1303 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1304 SZ_NLATTR_TYPE_OF(uint16_t) +
1305 /* VLAN protocol. */
1306 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1307 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1309 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1310 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1311 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1312 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1313 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1314 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1315 size += flow_tcf_get_pedit_actions_size(&actions,
1320 "unsupported action %p type %d,"
1321 " items must be validated before flow creation",
1322 (const void *)actions, actions->type);
1326 *action_flags = flags;
1331 * Brand rtnetlink buffer with unique handle.
1333 * This handle should be unique for a given network interface to avoid
1337 * Pointer to Netlink message.
1339 * Unique 32-bit handle to use.
1342 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1344 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1346 tcm->tcm_handle = handle;
1347 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1348 (void *)nlh, handle);
1352 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1353 * memory required, allocates the memory, initializes Netlink message headers
1354 * and set unique TC message handle.
1357 * Pointer to the flow attributes.
1359 * Pointer to the list of items.
1360 * @param[in] actions
1361 * Pointer to the list of actions.
1362 * @param[out] item_flags
1363 * Pointer to bit mask of all items detected.
1364 * @param[out] action_flags
1365 * Pointer to bit mask of all actions detected.
1367 * Pointer to the error structure.
1370 * Pointer to mlx5_flow object on success,
1371 * otherwise NULL and rte_ernno is set.
1373 static struct mlx5_flow *
1374 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
1375 const struct rte_flow_item items[],
1376 const struct rte_flow_action actions[],
1377 uint64_t *item_flags, uint64_t *action_flags,
1378 struct rte_flow_error *error)
1380 size_t size = sizeof(struct mlx5_flow) +
1381 MNL_ALIGN(sizeof(struct nlmsghdr)) +
1382 MNL_ALIGN(sizeof(struct tcmsg));
1383 struct mlx5_flow *dev_flow;
1384 struct nlmsghdr *nlh;
1387 size += flow_tcf_get_items_and_size(items, item_flags);
1388 size += flow_tcf_get_actions_and_size(actions, action_flags);
1389 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1391 rte_flow_error_set(error, ENOMEM,
1392 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1393 "not enough memory to create E-Switch flow");
1396 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1397 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1398 *dev_flow = (struct mlx5_flow){
1399 .tcf = (struct mlx5_flow_tcf){
1405 * Generate a reasonably unique handle based on the address of the
1408 * This is straightforward on 32-bit systems where the flow pointer can
1409 * be used directly. Otherwise, its least significant part is taken
1410 * after shifting it by the previous power of two of the pointed buffer
1413 if (sizeof(dev_flow) <= 4)
1414 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1416 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1417 rte_log2_u32(rte_align32prevpow2(size)));
1422 * Translate flow for Linux TC flower and construct Netlink message.
1425 * Pointer to the priv structure.
1426 * @param[in, out] flow
1427 * Pointer to the sub flow.
1429 * Pointer to the flow attributes.
1431 * Pointer to the list of items.
1432 * @param[in] actions
1433 * Pointer to the list of actions.
1435 * Pointer to the error structure.
1438 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1441 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1442 const struct rte_flow_attr *attr,
1443 const struct rte_flow_item items[],
1444 const struct rte_flow_action actions[],
1445 struct rte_flow_error *error)
1448 const struct rte_flow_item_port_id *port_id;
1449 const struct rte_flow_item_eth *eth;
1450 const struct rte_flow_item_vlan *vlan;
1451 const struct rte_flow_item_ipv4 *ipv4;
1452 const struct rte_flow_item_ipv6 *ipv6;
1453 const struct rte_flow_item_tcp *tcp;
1454 const struct rte_flow_item_udp *udp;
1457 const struct rte_flow_action_port_id *port_id;
1458 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1459 const struct rte_flow_action_of_set_vlan_vid *
1461 const struct rte_flow_action_of_set_vlan_pcp *
1464 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1465 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1466 struct tcmsg *tcm = dev_flow->tcf.tcm;
1467 uint32_t na_act_index_cur;
1468 bool eth_type_set = 0;
1469 bool vlan_present = 0;
1470 bool vlan_eth_type_set = 0;
1471 bool ip_proto_set = 0;
1472 struct nlattr *na_flower;
1473 struct nlattr *na_flower_act;
1474 struct nlattr *na_vlan_id = NULL;
1475 struct nlattr *na_vlan_priority = NULL;
1476 uint64_t item_flags = 0;
1478 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1479 PTOI_TABLE_SZ_MAX(dev)));
1480 nlh = dev_flow->tcf.nlh;
1481 tcm = dev_flow->tcf.tcm;
1482 /* Prepare API must have been called beforehand. */
1483 assert(nlh != NULL && tcm != NULL);
1484 tcm->tcm_family = AF_UNSPEC;
1485 tcm->tcm_ifindex = ptoi[0].ifindex;
1486 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1488 * Priority cannot be zero to prevent the kernel from picking one
1491 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1492 RTE_BE16(ETH_P_ALL));
1493 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1494 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1495 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1496 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1499 switch (items->type) {
1500 case RTE_FLOW_ITEM_TYPE_VOID:
1502 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1503 mask.port_id = flow_tcf_item_mask
1504 (items, &rte_flow_item_port_id_mask,
1505 &flow_tcf_mask_supported.port_id,
1506 &flow_tcf_mask_empty.port_id,
1507 sizeof(flow_tcf_mask_supported.port_id),
1509 assert(mask.port_id);
1510 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1512 spec.port_id = items->spec;
1513 if (!mask.port_id->id)
1516 for (i = 0; ptoi[i].ifindex; ++i)
1517 if (ptoi[i].port_id == spec.port_id->id)
1519 assert(ptoi[i].ifindex);
1520 tcm->tcm_ifindex = ptoi[i].ifindex;
1522 case RTE_FLOW_ITEM_TYPE_ETH:
1523 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1524 mask.eth = flow_tcf_item_mask
1525 (items, &rte_flow_item_eth_mask,
1526 &flow_tcf_mask_supported.eth,
1527 &flow_tcf_mask_empty.eth,
1528 sizeof(flow_tcf_mask_supported.eth),
1531 if (mask.eth == &flow_tcf_mask_empty.eth)
1533 spec.eth = items->spec;
1534 if (mask.eth->type) {
1535 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1539 if (!is_zero_ether_addr(&mask.eth->dst)) {
1540 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1542 spec.eth->dst.addr_bytes);
1543 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1545 mask.eth->dst.addr_bytes);
1547 if (!is_zero_ether_addr(&mask.eth->src)) {
1548 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1550 spec.eth->src.addr_bytes);
1551 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1553 mask.eth->src.addr_bytes);
1556 case RTE_FLOW_ITEM_TYPE_VLAN:
1557 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1558 mask.vlan = flow_tcf_item_mask
1559 (items, &rte_flow_item_vlan_mask,
1560 &flow_tcf_mask_supported.vlan,
1561 &flow_tcf_mask_empty.vlan,
1562 sizeof(flow_tcf_mask_supported.vlan),
1566 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1567 RTE_BE16(ETH_P_8021Q));
1570 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1572 spec.vlan = items->spec;
1573 if (mask.vlan->inner_type) {
1574 mnl_attr_put_u16(nlh,
1575 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1576 spec.vlan->inner_type);
1577 vlan_eth_type_set = 1;
1579 if (mask.vlan->tci & RTE_BE16(0xe000))
1580 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1582 (spec.vlan->tci) >> 13) & 0x7);
1583 if (mask.vlan->tci & RTE_BE16(0x0fff))
1584 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1589 case RTE_FLOW_ITEM_TYPE_IPV4:
1590 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1591 mask.ipv4 = flow_tcf_item_mask
1592 (items, &rte_flow_item_ipv4_mask,
1593 &flow_tcf_mask_supported.ipv4,
1594 &flow_tcf_mask_empty.ipv4,
1595 sizeof(flow_tcf_mask_supported.ipv4),
1598 if (!eth_type_set || !vlan_eth_type_set)
1599 mnl_attr_put_u16(nlh,
1601 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1602 TCA_FLOWER_KEY_ETH_TYPE,
1603 RTE_BE16(ETH_P_IP));
1605 vlan_eth_type_set = 1;
1606 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1608 spec.ipv4 = items->spec;
1609 if (mask.ipv4->hdr.next_proto_id) {
1610 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1611 spec.ipv4->hdr.next_proto_id);
1614 if (mask.ipv4->hdr.src_addr) {
1615 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1616 spec.ipv4->hdr.src_addr);
1617 mnl_attr_put_u32(nlh,
1618 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1619 mask.ipv4->hdr.src_addr);
1621 if (mask.ipv4->hdr.dst_addr) {
1622 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1623 spec.ipv4->hdr.dst_addr);
1624 mnl_attr_put_u32(nlh,
1625 TCA_FLOWER_KEY_IPV4_DST_MASK,
1626 mask.ipv4->hdr.dst_addr);
1629 case RTE_FLOW_ITEM_TYPE_IPV6:
1630 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1631 mask.ipv6 = flow_tcf_item_mask
1632 (items, &rte_flow_item_ipv6_mask,
1633 &flow_tcf_mask_supported.ipv6,
1634 &flow_tcf_mask_empty.ipv6,
1635 sizeof(flow_tcf_mask_supported.ipv6),
1638 if (!eth_type_set || !vlan_eth_type_set)
1639 mnl_attr_put_u16(nlh,
1641 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1642 TCA_FLOWER_KEY_ETH_TYPE,
1643 RTE_BE16(ETH_P_IPV6));
1645 vlan_eth_type_set = 1;
1646 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1648 spec.ipv6 = items->spec;
1649 if (mask.ipv6->hdr.proto) {
1650 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1651 spec.ipv6->hdr.proto);
1654 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1655 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1656 sizeof(spec.ipv6->hdr.src_addr),
1657 spec.ipv6->hdr.src_addr);
1658 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1659 sizeof(mask.ipv6->hdr.src_addr),
1660 mask.ipv6->hdr.src_addr);
1662 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1663 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1664 sizeof(spec.ipv6->hdr.dst_addr),
1665 spec.ipv6->hdr.dst_addr);
1666 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1667 sizeof(mask.ipv6->hdr.dst_addr),
1668 mask.ipv6->hdr.dst_addr);
1671 case RTE_FLOW_ITEM_TYPE_UDP:
1672 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1673 mask.udp = flow_tcf_item_mask
1674 (items, &rte_flow_item_udp_mask,
1675 &flow_tcf_mask_supported.udp,
1676 &flow_tcf_mask_empty.udp,
1677 sizeof(flow_tcf_mask_supported.udp),
1681 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1683 if (mask.udp == &flow_tcf_mask_empty.udp)
1685 spec.udp = items->spec;
1686 if (mask.udp->hdr.src_port) {
1687 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1688 spec.udp->hdr.src_port);
1689 mnl_attr_put_u16(nlh,
1690 TCA_FLOWER_KEY_UDP_SRC_MASK,
1691 mask.udp->hdr.src_port);
1693 if (mask.udp->hdr.dst_port) {
1694 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1695 spec.udp->hdr.dst_port);
1696 mnl_attr_put_u16(nlh,
1697 TCA_FLOWER_KEY_UDP_DST_MASK,
1698 mask.udp->hdr.dst_port);
1701 case RTE_FLOW_ITEM_TYPE_TCP:
1702 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1703 mask.tcp = flow_tcf_item_mask
1704 (items, &rte_flow_item_tcp_mask,
1705 &flow_tcf_mask_supported.tcp,
1706 &flow_tcf_mask_empty.tcp,
1707 sizeof(flow_tcf_mask_supported.tcp),
1711 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1713 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1715 spec.tcp = items->spec;
1716 if (mask.tcp->hdr.src_port) {
1717 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1718 spec.tcp->hdr.src_port);
1719 mnl_attr_put_u16(nlh,
1720 TCA_FLOWER_KEY_TCP_SRC_MASK,
1721 mask.tcp->hdr.src_port);
1723 if (mask.tcp->hdr.dst_port) {
1724 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1725 spec.tcp->hdr.dst_port);
1726 mnl_attr_put_u16(nlh,
1727 TCA_FLOWER_KEY_TCP_DST_MASK,
1728 mask.tcp->hdr.dst_port);
1730 if (mask.tcp->hdr.tcp_flags) {
1733 TCA_FLOWER_KEY_TCP_FLAGS,
1735 (spec.tcp->hdr.tcp_flags));
1738 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1740 (mask.tcp->hdr.tcp_flags));
1744 return rte_flow_error_set(error, ENOTSUP,
1745 RTE_FLOW_ERROR_TYPE_ITEM,
1746 NULL, "item not supported");
1749 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1750 na_act_index_cur = 1;
1751 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1752 struct nlattr *na_act_index;
1753 struct nlattr *na_act;
1754 unsigned int vlan_act;
1757 switch (actions->type) {
1758 case RTE_FLOW_ACTION_TYPE_VOID:
1760 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1761 conf.port_id = actions->conf;
1762 if (conf.port_id->original)
1765 for (i = 0; ptoi[i].ifindex; ++i)
1766 if (ptoi[i].port_id == conf.port_id->id)
1768 assert(ptoi[i].ifindex);
1770 mnl_attr_nest_start(nlh, na_act_index_cur++);
1771 assert(na_act_index);
1772 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1773 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1775 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1776 sizeof(struct tc_mirred),
1777 &(struct tc_mirred){
1778 .action = TC_ACT_STOLEN,
1779 .eaction = TCA_EGRESS_REDIR,
1780 .ifindex = ptoi[i].ifindex,
1782 mnl_attr_nest_end(nlh, na_act);
1783 mnl_attr_nest_end(nlh, na_act_index);
1785 case RTE_FLOW_ACTION_TYPE_DROP:
1787 mnl_attr_nest_start(nlh, na_act_index_cur++);
1788 assert(na_act_index);
1789 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1790 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1792 mnl_attr_put(nlh, TCA_GACT_PARMS,
1793 sizeof(struct tc_gact),
1795 .action = TC_ACT_SHOT,
1797 mnl_attr_nest_end(nlh, na_act);
1798 mnl_attr_nest_end(nlh, na_act_index);
1800 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1801 conf.of_push_vlan = NULL;
1802 vlan_act = TCA_VLAN_ACT_POP;
1803 goto action_of_vlan;
1804 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1805 conf.of_push_vlan = actions->conf;
1806 vlan_act = TCA_VLAN_ACT_PUSH;
1807 goto action_of_vlan;
1808 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1809 conf.of_set_vlan_vid = actions->conf;
1811 goto override_na_vlan_id;
1812 vlan_act = TCA_VLAN_ACT_MODIFY;
1813 goto action_of_vlan;
1814 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1815 conf.of_set_vlan_pcp = actions->conf;
1816 if (na_vlan_priority)
1817 goto override_na_vlan_priority;
1818 vlan_act = TCA_VLAN_ACT_MODIFY;
1819 goto action_of_vlan;
1822 mnl_attr_nest_start(nlh, na_act_index_cur++);
1823 assert(na_act_index);
1824 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1825 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1827 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1828 sizeof(struct tc_vlan),
1830 .action = TC_ACT_PIPE,
1831 .v_action = vlan_act,
1833 if (vlan_act == TCA_VLAN_ACT_POP) {
1834 mnl_attr_nest_end(nlh, na_act);
1835 mnl_attr_nest_end(nlh, na_act_index);
1838 if (vlan_act == TCA_VLAN_ACT_PUSH)
1839 mnl_attr_put_u16(nlh,
1840 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1841 conf.of_push_vlan->ethertype);
1842 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1843 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1844 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1845 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1846 mnl_attr_nest_end(nlh, na_act);
1847 mnl_attr_nest_end(nlh, na_act_index);
1848 if (actions->type ==
1849 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1850 override_na_vlan_id:
1851 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1852 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1854 (conf.of_set_vlan_vid->vlan_vid);
1855 } else if (actions->type ==
1856 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1857 override_na_vlan_priority:
1858 na_vlan_priority->nla_type =
1859 TCA_VLAN_PUSH_VLAN_PRIORITY;
1860 *(uint8_t *)mnl_attr_get_payload
1861 (na_vlan_priority) =
1862 conf.of_set_vlan_pcp->vlan_pcp;
1865 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1866 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1867 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1868 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1869 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1870 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1872 mnl_attr_nest_start(nlh, na_act_index_cur++);
1873 flow_tcf_create_pedit_mnl_msg(nlh,
1874 &actions, item_flags);
1875 mnl_attr_nest_end(nlh, na_act_index);
1878 return rte_flow_error_set(error, ENOTSUP,
1879 RTE_FLOW_ERROR_TYPE_ACTION,
1881 "action not supported");
1885 assert(na_flower_act);
1886 mnl_attr_nest_end(nlh, na_flower_act);
1887 mnl_attr_nest_end(nlh, na_flower);
1892 * Send Netlink message with acknowledgment.
1895 * Libmnl socket to use.
1897 * Message to send. This function always raises the NLM_F_ACK flag before
1901 * 0 on success, a negative errno value otherwise and rte_errno is set.
1904 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1906 alignas(struct nlmsghdr)
1907 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1908 nlh->nlmsg_len - sizeof(*nlh)];
1909 uint32_t seq = random();
1912 nlh->nlmsg_flags |= NLM_F_ACK;
1913 nlh->nlmsg_seq = seq;
1914 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1916 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1919 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1927 * Apply flow to E-Switch by sending Netlink message.
1930 * Pointer to Ethernet device.
1931 * @param[in, out] flow
1932 * Pointer to the sub flow.
1934 * Pointer to the error structure.
1937 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1940 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1941 struct rte_flow_error *error)
1943 struct priv *priv = dev->data->dev_private;
1944 struct mnl_socket *nl = priv->mnl_socket;
1945 struct mlx5_flow *dev_flow;
1946 struct nlmsghdr *nlh;
1948 dev_flow = LIST_FIRST(&flow->dev_flows);
1949 /* E-Switch flow can't be expanded. */
1950 assert(!LIST_NEXT(dev_flow, next));
1951 nlh = dev_flow->tcf.nlh;
1952 nlh->nlmsg_type = RTM_NEWTFILTER;
1953 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1954 if (!flow_tcf_nl_ack(nl, nlh))
1956 return rte_flow_error_set(error, rte_errno,
1957 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1958 "netlink: failed to create TC flow rule");
1962 * Remove flow from E-Switch by sending Netlink message.
1965 * Pointer to Ethernet device.
1966 * @param[in, out] flow
1967 * Pointer to the sub flow.
1970 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1972 struct priv *priv = dev->data->dev_private;
1973 struct mnl_socket *nl = priv->mnl_socket;
1974 struct mlx5_flow *dev_flow;
1975 struct nlmsghdr *nlh;
1979 dev_flow = LIST_FIRST(&flow->dev_flows);
1982 /* E-Switch flow can't be expanded. */
1983 assert(!LIST_NEXT(dev_flow, next));
1984 nlh = dev_flow->tcf.nlh;
1985 nlh->nlmsg_type = RTM_DELTFILTER;
1986 nlh->nlmsg_flags = NLM_F_REQUEST;
1987 flow_tcf_nl_ack(nl, nlh);
1991 * Remove flow from E-Switch and release resources of the device flow.
1994 * Pointer to Ethernet device.
1995 * @param[in, out] flow
1996 * Pointer to the sub flow.
1999 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2001 struct mlx5_flow *dev_flow;
2005 flow_tcf_remove(dev, flow);
2006 dev_flow = LIST_FIRST(&flow->dev_flows);
2009 /* E-Switch flow can't be expanded. */
2010 assert(!LIST_NEXT(dev_flow, next));
2011 LIST_REMOVE(dev_flow, next);
2015 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2016 .validate = flow_tcf_validate,
2017 .prepare = flow_tcf_prepare,
2018 .translate = flow_tcf_translate,
2019 .apply = flow_tcf_apply,
2020 .remove = flow_tcf_remove,
2021 .destroy = flow_tcf_destroy,
2025 * Initialize ingress qdisc of a given network interface.
2028 * Libmnl socket of the @p NETLINK_ROUTE kind.
2030 * Index of network interface to initialize.
2032 * Perform verbose error reporting if not NULL.
2035 * 0 on success, a negative errno value otherwise and rte_errno is set.
2038 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2039 struct rte_flow_error *error)
2041 struct nlmsghdr *nlh;
2043 alignas(struct nlmsghdr)
2044 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2046 /* Destroy existing ingress qdisc and everything attached to it. */
2047 nlh = mnl_nlmsg_put_header(buf);
2048 nlh->nlmsg_type = RTM_DELQDISC;
2049 nlh->nlmsg_flags = NLM_F_REQUEST;
2050 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2051 tcm->tcm_family = AF_UNSPEC;
2052 tcm->tcm_ifindex = ifindex;
2053 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2054 tcm->tcm_parent = TC_H_INGRESS;
2055 /* Ignore errors when qdisc is already absent. */
2056 if (flow_tcf_nl_ack(nl, nlh) &&
2057 rte_errno != EINVAL && rte_errno != ENOENT)
2058 return rte_flow_error_set(error, rte_errno,
2059 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2060 "netlink: failed to remove ingress"
2062 /* Create fresh ingress qdisc. */
2063 nlh = mnl_nlmsg_put_header(buf);
2064 nlh->nlmsg_type = RTM_NEWQDISC;
2065 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2066 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2067 tcm->tcm_family = AF_UNSPEC;
2068 tcm->tcm_ifindex = ifindex;
2069 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2070 tcm->tcm_parent = TC_H_INGRESS;
2071 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2072 if (flow_tcf_nl_ack(nl, nlh))
2073 return rte_flow_error_set(error, rte_errno,
2074 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2075 "netlink: failed to create ingress"
2081 * Create and configure a libmnl socket for Netlink flow rules.
2084 * A valid libmnl socket object pointer on success, NULL otherwise and
2088 mlx5_flow_tcf_socket_create(void)
2090 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2093 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2095 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2100 mnl_socket_close(nl);
2105 * Destroy a libmnl socket.
2108 * Libmnl socket of the @p NETLINK_ROUTE kind.
2111 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2113 mnl_socket_close(nl);