1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
30 #include "mlx5_autoconf.h"
32 #ifdef HAVE_TC_ACT_VLAN
34 #include <linux/tc_act/tc_vlan.h>
36 #else /* HAVE_TC_ACT_VLAN */
38 #define TCA_VLAN_ACT_POP 1
39 #define TCA_VLAN_ACT_PUSH 2
40 #define TCA_VLAN_ACT_MODIFY 3
41 #define TCA_VLAN_PARMS 2
42 #define TCA_VLAN_PUSH_VLAN_ID 3
43 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
44 #define TCA_VLAN_PAD 5
45 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
52 #endif /* HAVE_TC_ACT_VLAN */
54 /* Normally found in linux/netlink.h. */
55 #ifndef NETLINK_CAP_ACK
56 #define NETLINK_CAP_ACK 10
59 /* Normally found in linux/pkt_sched.h. */
60 #ifndef TC_H_MIN_INGRESS
61 #define TC_H_MIN_INGRESS 0xfff2u
64 /* Normally found in linux/pkt_cls.h. */
65 #ifndef TCA_CLS_FLAGS_SKIP_SW
66 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
68 #ifndef HAVE_TCA_FLOWER_ACT
69 #define TCA_FLOWER_ACT 3
71 #ifndef HAVE_TCA_FLOWER_FLAGS
72 #define TCA_FLOWER_FLAGS 22
74 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
75 #define TCA_FLOWER_KEY_ETH_TYPE 8
77 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
78 #define TCA_FLOWER_KEY_ETH_DST 4
80 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
81 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
83 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
84 #define TCA_FLOWER_KEY_ETH_SRC 6
86 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
87 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
89 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
90 #define TCA_FLOWER_KEY_IP_PROTO 9
92 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
93 #define TCA_FLOWER_KEY_IPV4_SRC 10
95 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
96 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
98 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
99 #define TCA_FLOWER_KEY_IPV4_DST 12
101 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
102 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
104 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
105 #define TCA_FLOWER_KEY_IPV6_SRC 14
107 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
108 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
110 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
111 #define TCA_FLOWER_KEY_IPV6_DST 16
113 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
114 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
116 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
117 #define TCA_FLOWER_KEY_TCP_SRC 18
119 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
120 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
122 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
123 #define TCA_FLOWER_KEY_TCP_DST 19
125 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
126 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
128 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
129 #define TCA_FLOWER_KEY_UDP_SRC 20
131 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
132 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
134 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
135 #define TCA_FLOWER_KEY_UDP_DST 21
137 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
138 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
140 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
141 #define TCA_FLOWER_KEY_VLAN_ID 23
143 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
144 #define TCA_FLOWER_KEY_VLAN_PRIO 24
146 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
147 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
150 /** Parser state definitions for mlx5_nl_flow_trans[]. */
151 enum mlx5_nl_flow_trans {
169 ACTION_OF_SET_VLAN_VID,
170 ACTION_OF_SET_VLAN_PCP,
174 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
176 #define PATTERN_COMMON \
178 #define ACTIONS_COMMON \
179 ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
180 ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
181 #define ACTIONS_FATE \
182 ACTION_PORT_ID, ACTION_DROP
184 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
185 static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
188 [ATTR] = TRANS(PATTERN),
189 [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
190 [ITEM_VOID] = TRANS(BACK),
191 [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
192 [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
193 [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
194 [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
195 [ITEM_TCP] = TRANS(PATTERN_COMMON),
196 [ITEM_UDP] = TRANS(PATTERN_COMMON),
197 [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
198 [ACTION_VOID] = TRANS(BACK),
199 [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
200 [ACTION_DROP] = TRANS(ACTION_VOID, END),
201 [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
202 [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
203 [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
204 [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
208 /** Empty masks for known item types. */
210 struct rte_flow_item_eth eth;
211 struct rte_flow_item_vlan vlan;
212 struct rte_flow_item_ipv4 ipv4;
213 struct rte_flow_item_ipv6 ipv6;
214 struct rte_flow_item_tcp tcp;
215 struct rte_flow_item_udp udp;
216 } mlx5_nl_flow_mask_empty;
218 /** Supported masks for known item types. */
219 static const struct {
220 struct rte_flow_item_eth eth;
221 struct rte_flow_item_vlan vlan;
222 struct rte_flow_item_ipv4 ipv4;
223 struct rte_flow_item_ipv6 ipv6;
224 struct rte_flow_item_tcp tcp;
225 struct rte_flow_item_udp udp;
226 } mlx5_nl_flow_mask_supported = {
228 .type = RTE_BE16(0xffff),
229 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
230 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
233 /* PCP and VID only, no DEI. */
234 .tci = RTE_BE16(0xefff),
235 .inner_type = RTE_BE16(0xffff),
238 .next_proto_id = 0xff,
239 .src_addr = RTE_BE32(0xffffffff),
240 .dst_addr = RTE_BE32(0xffffffff),
245 "\xff\xff\xff\xff\xff\xff\xff\xff"
246 "\xff\xff\xff\xff\xff\xff\xff\xff",
248 "\xff\xff\xff\xff\xff\xff\xff\xff"
249 "\xff\xff\xff\xff\xff\xff\xff\xff",
252 .src_port = RTE_BE16(0xffff),
253 .dst_port = RTE_BE16(0xffff),
256 .src_port = RTE_BE16(0xffff),
257 .dst_port = RTE_BE16(0xffff),
262 * Retrieve mask for pattern item.
264 * This function does basic sanity checks on a pattern item in order to
265 * return the most appropriate mask for it.
268 * Item specification.
269 * @param[in] mask_default
270 * Default mask for pattern item as specified by the flow API.
271 * @param[in] mask_supported
272 * Mask fields supported by the implementation.
273 * @param[in] mask_empty
274 * Empty mask to return when there is no specification.
276 * Perform verbose error reporting if not NULL.
279 * Either @p item->mask or one of the mask parameters on success, NULL
280 * otherwise and rte_errno is set.
283 mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
284 const void *mask_default,
285 const void *mask_supported,
286 const void *mask_empty,
288 struct rte_flow_error *error)
293 /* item->last and item->mask cannot exist without item->spec. */
294 if (!item->spec && (item->mask || item->last)) {
296 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
297 "\"mask\" or \"last\" field provided without a"
298 " corresponding \"spec\"");
301 /* No spec, no mask, no problem. */
304 mask = item->mask ? item->mask : mask_default;
307 * Single-pass check to make sure that:
308 * - Mask is supported, no bits are set outside mask_supported.
309 * - Both item->spec and item->last are included in mask.
311 for (i = 0; i != mask_size; ++i) {
314 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
315 ((const uint8_t *)mask_supported)[i]) {
317 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
318 mask, "unsupported field found in \"mask\"");
322 (((const uint8_t *)item->spec)[i] & mask[i]) !=
323 (((const uint8_t *)item->last)[i] & mask[i])) {
325 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
327 "range between \"spec\" and \"last\" not"
328 " comprised in \"mask\"");
336 * Transpose flow rule description to rtnetlink message.
338 * This function transposes a flow rule description to a traffic control
339 * (TC) filter creation message ready to be sent over Netlink.
341 * Target interface is specified as the first entry of the @p ptoi table.
342 * Subsequent entries enable this function to resolve other DPDK port IDs
343 * found in the flow rule.
346 * Output message buffer. May be NULL when @p size is 0.
348 * Size of @p buf. Message may be truncated if not large enough.
350 * DPDK port ID to network interface index translation table. This table
351 * is terminated by an entry with a zero ifindex value.
353 * Flow rule attributes.
355 * Pattern specification.
357 * Associated actions.
359 * Perform verbose error reporting if not NULL.
362 * A positive value representing the exact size of the message in bytes
363 * regardless of the @p size parameter on success, a negative errno value
364 * otherwise and rte_errno is set.
367 mlx5_nl_flow_transpose(void *buf,
369 const struct mlx5_nl_flow_ptoi *ptoi,
370 const struct rte_flow_attr *attr,
371 const struct rte_flow_item *pattern,
372 const struct rte_flow_action *actions,
373 struct rte_flow_error *error)
375 alignas(struct nlmsghdr)
376 uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
377 const struct rte_flow_item *item;
378 const struct rte_flow_action *action;
380 uint32_t act_index_cur;
383 bool vlan_eth_type_set;
385 struct nlattr *na_flower;
386 struct nlattr *na_flower_act;
387 struct nlattr *na_vlan_id;
388 struct nlattr *na_vlan_priority;
389 const enum mlx5_nl_flow_trans *trans;
390 const enum mlx5_nl_flow_trans *back;
399 eth_type_set = false;
400 vlan_present = false;
401 vlan_eth_type_set = false;
402 ip_proto_set = false;
404 na_flower_act = NULL;
406 na_vlan_priority = NULL;
410 switch (trans[n++]) {
412 const struct rte_flow_item_eth *eth;
413 const struct rte_flow_item_vlan *vlan;
414 const struct rte_flow_item_ipv4 *ipv4;
415 const struct rte_flow_item_ipv6 *ipv6;
416 const struct rte_flow_item_tcp *tcp;
417 const struct rte_flow_item_udp *udp;
420 const struct rte_flow_action_port_id *port_id;
421 const struct rte_flow_action_of_push_vlan *of_push_vlan;
422 const struct rte_flow_action_of_set_vlan_vid *
424 const struct rte_flow_action_of_set_vlan_pcp *
427 struct nlmsghdr *nlh;
429 struct nlattr *act_index;
435 return rte_flow_error_set
436 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
437 item, "unsupported pattern item combination");
438 else if (action->type)
439 return rte_flow_error_set
440 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
441 action, "unsupported action combination");
442 return rte_flow_error_set
443 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
444 "flow rule lacks some kind of fate action");
451 * Supported attributes: no groups, some priorities and
452 * ingress only. Don't care about transfer as it is the
456 return rte_flow_error_set
458 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
459 attr, "groups are not supported");
460 if (attr->priority > 0xfffe)
461 return rte_flow_error_set
463 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
464 attr, "lowest priority level is 0xfffe");
466 return rte_flow_error_set
468 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
469 attr, "only ingress is supported");
471 return rte_flow_error_set
473 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
474 attr, "egress is not supported");
475 if (size < mnl_nlmsg_size(sizeof(*tcm)))
477 nlh = mnl_nlmsg_put_header(buf);
479 nlh->nlmsg_flags = 0;
481 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
482 tcm->tcm_family = AF_UNSPEC;
483 tcm->tcm_ifindex = ptoi[0].ifindex;
485 * Let kernel pick a handle by default. A predictable handle
486 * can be set by the caller on the resulting buffer through
487 * mlx5_nl_flow_brand().
490 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
492 * Priority cannot be zero to prevent the kernel from
493 * picking one automatically.
495 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
496 RTE_BE16(ETH_P_ALL));
499 if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
501 na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
504 if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
505 TCA_CLS_FLAGS_SKIP_SW))
509 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
514 if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
516 mask.eth = mlx5_nl_flow_item_mask
517 (item, &rte_flow_item_eth_mask,
518 &mlx5_nl_flow_mask_supported.eth,
519 &mlx5_nl_flow_mask_empty.eth,
520 sizeof(mlx5_nl_flow_mask_supported.eth), error);
523 if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
527 spec.eth = item->spec;
528 if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
529 return rte_flow_error_set
530 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
532 "no support for partial mask on"
534 if (mask.eth->type) {
535 if (!mnl_attr_put_u16_check(buf, size,
536 TCA_FLOWER_KEY_ETH_TYPE,
541 if ((!is_zero_ether_addr(&mask.eth->dst) &&
542 (!mnl_attr_put_check(buf, size,
543 TCA_FLOWER_KEY_ETH_DST,
545 spec.eth->dst.addr_bytes) ||
546 !mnl_attr_put_check(buf, size,
547 TCA_FLOWER_KEY_ETH_DST_MASK,
549 mask.eth->dst.addr_bytes))) ||
550 (!is_zero_ether_addr(&mask.eth->src) &&
551 (!mnl_attr_put_check(buf, size,
552 TCA_FLOWER_KEY_ETH_SRC,
554 spec.eth->src.addr_bytes) ||
555 !mnl_attr_put_check(buf, size,
556 TCA_FLOWER_KEY_ETH_SRC_MASK,
558 mask.eth->src.addr_bytes))))
563 if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
565 mask.vlan = mlx5_nl_flow_item_mask
566 (item, &rte_flow_item_vlan_mask,
567 &mlx5_nl_flow_mask_supported.vlan,
568 &mlx5_nl_flow_mask_empty.vlan,
569 sizeof(mlx5_nl_flow_mask_supported.vlan), error);
573 !mnl_attr_put_u16_check(buf, size,
574 TCA_FLOWER_KEY_ETH_TYPE,
575 RTE_BE16(ETH_P_8021Q)))
579 if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
583 spec.vlan = item->spec;
584 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
585 (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
586 (mask.vlan->tci & RTE_BE16(0x0fff) &&
587 (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
588 (mask.vlan->inner_type &&
589 mask.vlan->inner_type != RTE_BE16(0xffff)))
590 return rte_flow_error_set
591 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
593 "no support for partial masks on"
594 " \"tci\" (PCP and VID parts) and"
595 " \"inner_type\" fields");
596 if (mask.vlan->inner_type) {
597 if (!mnl_attr_put_u16_check
598 (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
599 spec.vlan->inner_type))
601 vlan_eth_type_set = 1;
603 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
604 !mnl_attr_put_u8_check
605 (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
606 (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
607 (mask.vlan->tci & RTE_BE16(0x0fff) &&
608 !mnl_attr_put_u16_check
609 (buf, size, TCA_FLOWER_KEY_VLAN_ID,
610 spec.vlan->tci & RTE_BE16(0x0fff))))
615 if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
617 mask.ipv4 = mlx5_nl_flow_item_mask
618 (item, &rte_flow_item_ipv4_mask,
619 &mlx5_nl_flow_mask_supported.ipv4,
620 &mlx5_nl_flow_mask_empty.ipv4,
621 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
624 if ((!eth_type_set || !vlan_eth_type_set) &&
625 !mnl_attr_put_u16_check(buf, size,
627 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
628 TCA_FLOWER_KEY_ETH_TYPE,
632 vlan_eth_type_set = 1;
633 if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
637 spec.ipv4 = item->spec;
638 if (mask.ipv4->hdr.next_proto_id &&
639 mask.ipv4->hdr.next_proto_id != 0xff)
640 return rte_flow_error_set
641 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
643 "no support for partial mask on"
644 " \"hdr.next_proto_id\" field");
645 if (mask.ipv4->hdr.next_proto_id) {
646 if (!mnl_attr_put_u8_check
647 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
648 spec.ipv4->hdr.next_proto_id))
652 if ((mask.ipv4->hdr.src_addr &&
653 (!mnl_attr_put_u32_check(buf, size,
654 TCA_FLOWER_KEY_IPV4_SRC,
655 spec.ipv4->hdr.src_addr) ||
656 !mnl_attr_put_u32_check(buf, size,
657 TCA_FLOWER_KEY_IPV4_SRC_MASK,
658 mask.ipv4->hdr.src_addr))) ||
659 (mask.ipv4->hdr.dst_addr &&
660 (!mnl_attr_put_u32_check(buf, size,
661 TCA_FLOWER_KEY_IPV4_DST,
662 spec.ipv4->hdr.dst_addr) ||
663 !mnl_attr_put_u32_check(buf, size,
664 TCA_FLOWER_KEY_IPV4_DST_MASK,
665 mask.ipv4->hdr.dst_addr))))
670 if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
672 mask.ipv6 = mlx5_nl_flow_item_mask
673 (item, &rte_flow_item_ipv6_mask,
674 &mlx5_nl_flow_mask_supported.ipv6,
675 &mlx5_nl_flow_mask_empty.ipv6,
676 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
679 if ((!eth_type_set || !vlan_eth_type_set) &&
680 !mnl_attr_put_u16_check(buf, size,
682 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
683 TCA_FLOWER_KEY_ETH_TYPE,
684 RTE_BE16(ETH_P_IPV6)))
687 vlan_eth_type_set = 1;
688 if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
692 spec.ipv6 = item->spec;
693 if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
694 return rte_flow_error_set
695 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
697 "no support for partial mask on"
698 " \"hdr.proto\" field");
699 if (mask.ipv6->hdr.proto) {
700 if (!mnl_attr_put_u8_check
701 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
702 spec.ipv6->hdr.proto))
706 if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
707 (!mnl_attr_put_check(buf, size,
708 TCA_FLOWER_KEY_IPV6_SRC,
709 sizeof(spec.ipv6->hdr.src_addr),
710 spec.ipv6->hdr.src_addr) ||
711 !mnl_attr_put_check(buf, size,
712 TCA_FLOWER_KEY_IPV6_SRC_MASK,
713 sizeof(mask.ipv6->hdr.src_addr),
714 mask.ipv6->hdr.src_addr))) ||
715 (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
716 (!mnl_attr_put_check(buf, size,
717 TCA_FLOWER_KEY_IPV6_DST,
718 sizeof(spec.ipv6->hdr.dst_addr),
719 spec.ipv6->hdr.dst_addr) ||
720 !mnl_attr_put_check(buf, size,
721 TCA_FLOWER_KEY_IPV6_DST_MASK,
722 sizeof(mask.ipv6->hdr.dst_addr),
723 mask.ipv6->hdr.dst_addr))))
728 if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
730 mask.tcp = mlx5_nl_flow_item_mask
731 (item, &rte_flow_item_tcp_mask,
732 &mlx5_nl_flow_mask_supported.tcp,
733 &mlx5_nl_flow_mask_empty.tcp,
734 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
738 !mnl_attr_put_u8_check(buf, size,
739 TCA_FLOWER_KEY_IP_PROTO,
742 if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
746 spec.tcp = item->spec;
747 if ((mask.tcp->hdr.src_port &&
748 mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
749 (mask.tcp->hdr.dst_port &&
750 mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
751 return rte_flow_error_set
752 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
754 "no support for partial masks on"
755 " \"hdr.src_port\" and \"hdr.dst_port\""
757 if ((mask.tcp->hdr.src_port &&
758 (!mnl_attr_put_u16_check(buf, size,
759 TCA_FLOWER_KEY_TCP_SRC,
760 spec.tcp->hdr.src_port) ||
761 !mnl_attr_put_u16_check(buf, size,
762 TCA_FLOWER_KEY_TCP_SRC_MASK,
763 mask.tcp->hdr.src_port))) ||
764 (mask.tcp->hdr.dst_port &&
765 (!mnl_attr_put_u16_check(buf, size,
766 TCA_FLOWER_KEY_TCP_DST,
767 spec.tcp->hdr.dst_port) ||
768 !mnl_attr_put_u16_check(buf, size,
769 TCA_FLOWER_KEY_TCP_DST_MASK,
770 mask.tcp->hdr.dst_port))))
775 if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
777 mask.udp = mlx5_nl_flow_item_mask
778 (item, &rte_flow_item_udp_mask,
779 &mlx5_nl_flow_mask_supported.udp,
780 &mlx5_nl_flow_mask_empty.udp,
781 sizeof(mlx5_nl_flow_mask_supported.udp), error);
785 !mnl_attr_put_u8_check(buf, size,
786 TCA_FLOWER_KEY_IP_PROTO,
789 if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
793 spec.udp = item->spec;
794 if ((mask.udp->hdr.src_port &&
795 mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
796 (mask.udp->hdr.dst_port &&
797 mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
798 return rte_flow_error_set
799 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
801 "no support for partial masks on"
802 " \"hdr.src_port\" and \"hdr.dst_port\""
804 if ((mask.udp->hdr.src_port &&
805 (!mnl_attr_put_u16_check(buf, size,
806 TCA_FLOWER_KEY_UDP_SRC,
807 spec.udp->hdr.src_port) ||
808 !mnl_attr_put_u16_check(buf, size,
809 TCA_FLOWER_KEY_UDP_SRC_MASK,
810 mask.udp->hdr.src_port))) ||
811 (mask.udp->hdr.dst_port &&
812 (!mnl_attr_put_u16_check(buf, size,
813 TCA_FLOWER_KEY_UDP_DST,
814 spec.udp->hdr.dst_port) ||
815 !mnl_attr_put_u16_check(buf, size,
816 TCA_FLOWER_KEY_UDP_DST_MASK,
817 mask.udp->hdr.dst_port))))
822 if (item->type != RTE_FLOW_ITEM_TYPE_END)
825 assert(!na_flower_act);
827 mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
833 if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
838 if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
840 conf.port_id = action->conf;
841 if (conf.port_id->original)
844 for (i = 0; ptoi[i].ifindex; ++i)
845 if (ptoi[i].port_id == conf.port_id->id)
847 if (!ptoi[i].ifindex)
848 return rte_flow_error_set
849 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
851 "missing data to convert port ID to ifindex");
853 mnl_attr_nest_start_check(buf, size, act_index_cur++);
855 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
857 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
860 if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
861 sizeof(struct tc_mirred),
863 .action = TC_ACT_STOLEN,
864 .eaction = TCA_EGRESS_REDIR,
865 .ifindex = ptoi[i].ifindex,
868 mnl_attr_nest_end(buf, act);
869 mnl_attr_nest_end(buf, act_index);
873 if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
876 mnl_attr_nest_start_check(buf, size, act_index_cur++);
878 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
880 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
883 if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
884 sizeof(struct tc_gact),
886 .action = TC_ACT_SHOT,
889 mnl_attr_nest_end(buf, act);
890 mnl_attr_nest_end(buf, act_index);
893 case ACTION_OF_POP_VLAN:
894 if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
896 conf.of_push_vlan = NULL;
897 i = TCA_VLAN_ACT_POP;
899 case ACTION_OF_PUSH_VLAN:
900 if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
902 conf.of_push_vlan = action->conf;
903 i = TCA_VLAN_ACT_PUSH;
905 case ACTION_OF_SET_VLAN_VID:
906 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
908 conf.of_set_vlan_vid = action->conf;
910 goto override_na_vlan_id;
911 i = TCA_VLAN_ACT_MODIFY;
913 case ACTION_OF_SET_VLAN_PCP:
914 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
916 conf.of_set_vlan_pcp = action->conf;
917 if (na_vlan_priority)
918 goto override_na_vlan_priority;
919 i = TCA_VLAN_ACT_MODIFY;
923 mnl_attr_nest_start_check(buf, size, act_index_cur++);
925 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
927 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
930 if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
931 sizeof(struct tc_vlan),
933 .action = TC_ACT_PIPE,
937 if (i == TCA_VLAN_ACT_POP) {
938 mnl_attr_nest_end(buf, act);
942 if (i == TCA_VLAN_ACT_PUSH &&
943 !mnl_attr_put_u16_check(buf, size,
944 TCA_VLAN_PUSH_VLAN_PROTOCOL,
945 conf.of_push_vlan->ethertype))
947 na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
948 if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
950 na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
951 if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
953 mnl_attr_nest_end(buf, act);
954 mnl_attr_nest_end(buf, act_index);
955 if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
957 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
958 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
960 (conf.of_set_vlan_vid->vlan_vid);
961 } else if (action->type ==
962 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
963 override_na_vlan_priority:
964 na_vlan_priority->nla_type =
965 TCA_VLAN_PUSH_VLAN_PRIORITY;
966 *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
967 conf.of_set_vlan_pcp->vlan_pcp;
972 if (item->type != RTE_FLOW_ITEM_TYPE_END ||
973 action->type != RTE_FLOW_ACTION_TYPE_END)
976 mnl_attr_nest_end(buf, na_flower_act);
978 mnl_attr_nest_end(buf, na_flower);
980 return nlh->nlmsg_len;
983 trans = mlx5_nl_flow_trans[trans[n - 1]];
987 if (buf != buf_tmp) {
989 size = sizeof(buf_tmp);
992 return rte_flow_error_set
993 (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
994 "generated TC message is too large");
998 * Brand rtnetlink buffer with unique handle.
1000 * This handle should be unique for a given network interface to avoid
1004 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1006 * Unique 32-bit handle to use.
1009 mlx5_nl_flow_brand(void *buf, uint32_t handle)
1011 struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
1013 tcm->tcm_handle = handle;
1017 * Send Netlink message with acknowledgment.
1020 * Libmnl socket to use.
1022 * Message to send. This function always raises the NLM_F_ACK flag before
1026 * 0 on success, a negative errno value otherwise and rte_errno is set.
1029 mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1031 alignas(struct nlmsghdr)
1032 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1033 nlh->nlmsg_len - sizeof(*nlh)];
1034 uint32_t seq = random();
1037 nlh->nlmsg_flags |= NLM_F_ACK;
1038 nlh->nlmsg_seq = seq;
1039 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1041 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1044 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1052 * Create a Netlink flow rule.
1055 * Libmnl socket to use.
1057 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1059 * Perform verbose error reporting if not NULL.
1062 * 0 on success, a negative errno value otherwise and rte_errno is set.
1065 mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
1066 struct rte_flow_error *error)
1068 struct nlmsghdr *nlh = buf;
1070 nlh->nlmsg_type = RTM_NEWTFILTER;
1071 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1072 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1074 return rte_flow_error_set
1075 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1076 "netlink: failed to create TC flow rule");
1080 * Destroy a Netlink flow rule.
1083 * Libmnl socket to use.
1085 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1087 * Perform verbose error reporting if not NULL.
1090 * 0 on success, a negative errno value otherwise and rte_errno is set.
1093 mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
1094 struct rte_flow_error *error)
1096 struct nlmsghdr *nlh = buf;
1098 nlh->nlmsg_type = RTM_DELTFILTER;
1099 nlh->nlmsg_flags = NLM_F_REQUEST;
1100 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1102 return rte_flow_error_set
1103 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1104 "netlink: failed to destroy TC flow rule");
1108 * Initialize ingress qdisc of a given network interface.
1111 * Libmnl socket of the @p NETLINK_ROUTE kind.
1113 * Index of network interface to initialize.
1115 * Perform verbose error reporting if not NULL.
1118 * 0 on success, a negative errno value otherwise and rte_errno is set.
1121 mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
1122 struct rte_flow_error *error)
1124 struct nlmsghdr *nlh;
1126 alignas(struct nlmsghdr)
1127 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1129 /* Destroy existing ingress qdisc and everything attached to it. */
1130 nlh = mnl_nlmsg_put_header(buf);
1131 nlh->nlmsg_type = RTM_DELQDISC;
1132 nlh->nlmsg_flags = NLM_F_REQUEST;
1133 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1134 tcm->tcm_family = AF_UNSPEC;
1135 tcm->tcm_ifindex = ifindex;
1136 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1137 tcm->tcm_parent = TC_H_INGRESS;
1138 /* Ignore errors when qdisc is already absent. */
1139 if (mlx5_nl_flow_nl_ack(nl, nlh) &&
1140 rte_errno != EINVAL && rte_errno != ENOENT)
1141 return rte_flow_error_set
1142 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1143 NULL, "netlink: failed to remove ingress qdisc");
1144 /* Create fresh ingress qdisc. */
1145 nlh = mnl_nlmsg_put_header(buf);
1146 nlh->nlmsg_type = RTM_NEWQDISC;
1147 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1148 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1149 tcm->tcm_family = AF_UNSPEC;
1150 tcm->tcm_ifindex = ifindex;
1151 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1152 tcm->tcm_parent = TC_H_INGRESS;
1153 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1154 if (mlx5_nl_flow_nl_ack(nl, nlh))
1155 return rte_flow_error_set
1156 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1157 NULL, "netlink: failed to create ingress qdisc");
1162 * Create and configure a libmnl socket for Netlink flow rules.
1165 * A valid libmnl socket object pointer on success, NULL otherwise and
1169 mlx5_nl_flow_socket_create(void)
1171 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1174 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1176 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1181 mnl_socket_close(nl);
1186 * Destroy a libmnl socket.
1189 mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
1191 mnl_socket_close(nl);