1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
30 #include "mlx5_autoconf.h"
32 #ifdef HAVE_TC_ACT_VLAN
34 #include <linux/tc_act/tc_vlan.h>
36 #else /* HAVE_TC_ACT_VLAN */
38 #define TCA_VLAN_ACT_POP 1
39 #define TCA_VLAN_ACT_PUSH 2
40 #define TCA_VLAN_ACT_MODIFY 3
41 #define TCA_VLAN_PARMS 2
42 #define TCA_VLAN_PUSH_VLAN_ID 3
43 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
44 #define TCA_VLAN_PAD 5
45 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
52 #endif /* HAVE_TC_ACT_VLAN */
54 /* Normally found in linux/netlink.h. */
55 #ifndef NETLINK_CAP_ACK
56 #define NETLINK_CAP_ACK 10
59 /* Normally found in linux/pkt_sched.h. */
60 #ifndef TC_H_MIN_INGRESS
61 #define TC_H_MIN_INGRESS 0xfff2u
64 /* Normally found in linux/pkt_cls.h. */
65 #ifndef TCA_CLS_FLAGS_SKIP_SW
66 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
68 #ifndef HAVE_TCA_FLOWER_ACT
69 #define TCA_FLOWER_ACT 3
71 #ifndef HAVE_TCA_FLOWER_FLAGS
72 #define TCA_FLOWER_FLAGS 22
74 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
75 #define TCA_FLOWER_KEY_ETH_TYPE 8
77 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
78 #define TCA_FLOWER_KEY_ETH_DST 4
80 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
81 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
83 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
84 #define TCA_FLOWER_KEY_ETH_SRC 6
86 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
87 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
89 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
90 #define TCA_FLOWER_KEY_IP_PROTO 9
92 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
93 #define TCA_FLOWER_KEY_IPV4_SRC 10
95 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
96 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
98 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
99 #define TCA_FLOWER_KEY_IPV4_DST 12
101 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
102 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
104 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
105 #define TCA_FLOWER_KEY_IPV6_SRC 14
107 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
108 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
110 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
111 #define TCA_FLOWER_KEY_IPV6_DST 16
113 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
114 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
116 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
117 #define TCA_FLOWER_KEY_TCP_SRC 18
119 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
120 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
122 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
123 #define TCA_FLOWER_KEY_TCP_DST 19
125 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
126 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
128 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
129 #define TCA_FLOWER_KEY_UDP_SRC 20
131 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
132 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
134 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
135 #define TCA_FLOWER_KEY_UDP_DST 21
137 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
138 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
140 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
141 #define TCA_FLOWER_KEY_VLAN_ID 23
143 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
144 #define TCA_FLOWER_KEY_VLAN_PRIO 24
146 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
147 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
150 /** Parser state definitions for mlx5_nl_flow_trans[]. */
151 enum mlx5_nl_flow_trans {
170 ACTION_OF_SET_VLAN_VID,
171 ACTION_OF_SET_VLAN_PCP,
175 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
177 #define PATTERN_COMMON \
178 ITEM_VOID, ITEM_PORT_ID, ACTIONS
179 #define ACTIONS_COMMON \
180 ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
181 ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
182 #define ACTIONS_FATE \
183 ACTION_PORT_ID, ACTION_DROP
185 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
186 static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
189 [ATTR] = TRANS(PATTERN),
190 [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
191 [ITEM_VOID] = TRANS(BACK),
192 [ITEM_PORT_ID] = TRANS(BACK),
193 [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
194 [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
195 [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
196 [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
197 [ITEM_TCP] = TRANS(PATTERN_COMMON),
198 [ITEM_UDP] = TRANS(PATTERN_COMMON),
199 [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
200 [ACTION_VOID] = TRANS(BACK),
201 [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
202 [ACTION_DROP] = TRANS(ACTION_VOID, END),
203 [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
204 [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
205 [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
206 [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
210 /** Empty masks for known item types. */
212 struct rte_flow_item_port_id port_id;
213 struct rte_flow_item_eth eth;
214 struct rte_flow_item_vlan vlan;
215 struct rte_flow_item_ipv4 ipv4;
216 struct rte_flow_item_ipv6 ipv6;
217 struct rte_flow_item_tcp tcp;
218 struct rte_flow_item_udp udp;
219 } mlx5_nl_flow_mask_empty;
221 /** Supported masks for known item types. */
222 static const struct {
223 struct rte_flow_item_port_id port_id;
224 struct rte_flow_item_eth eth;
225 struct rte_flow_item_vlan vlan;
226 struct rte_flow_item_ipv4 ipv4;
227 struct rte_flow_item_ipv6 ipv6;
228 struct rte_flow_item_tcp tcp;
229 struct rte_flow_item_udp udp;
230 } mlx5_nl_flow_mask_supported = {
235 .type = RTE_BE16(0xffff),
236 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
237 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
240 /* PCP and VID only, no DEI. */
241 .tci = RTE_BE16(0xefff),
242 .inner_type = RTE_BE16(0xffff),
245 .next_proto_id = 0xff,
246 .src_addr = RTE_BE32(0xffffffff),
247 .dst_addr = RTE_BE32(0xffffffff),
252 "\xff\xff\xff\xff\xff\xff\xff\xff"
253 "\xff\xff\xff\xff\xff\xff\xff\xff",
255 "\xff\xff\xff\xff\xff\xff\xff\xff"
256 "\xff\xff\xff\xff\xff\xff\xff\xff",
259 .src_port = RTE_BE16(0xffff),
260 .dst_port = RTE_BE16(0xffff),
263 .src_port = RTE_BE16(0xffff),
264 .dst_port = RTE_BE16(0xffff),
269 * Retrieve mask for pattern item.
271 * This function does basic sanity checks on a pattern item in order to
272 * return the most appropriate mask for it.
275 * Item specification.
276 * @param[in] mask_default
277 * Default mask for pattern item as specified by the flow API.
278 * @param[in] mask_supported
279 * Mask fields supported by the implementation.
280 * @param[in] mask_empty
281 * Empty mask to return when there is no specification.
283 * Perform verbose error reporting if not NULL.
286 * Either @p item->mask or one of the mask parameters on success, NULL
287 * otherwise and rte_errno is set.
290 mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
291 const void *mask_default,
292 const void *mask_supported,
293 const void *mask_empty,
295 struct rte_flow_error *error)
300 /* item->last and item->mask cannot exist without item->spec. */
301 if (!item->spec && (item->mask || item->last)) {
303 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
304 "\"mask\" or \"last\" field provided without a"
305 " corresponding \"spec\"");
308 /* No spec, no mask, no problem. */
311 mask = item->mask ? item->mask : mask_default;
314 * Single-pass check to make sure that:
315 * - Mask is supported, no bits are set outside mask_supported.
316 * - Both item->spec and item->last are included in mask.
318 for (i = 0; i != mask_size; ++i) {
321 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
322 ((const uint8_t *)mask_supported)[i]) {
324 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
325 mask, "unsupported field found in \"mask\"");
329 (((const uint8_t *)item->spec)[i] & mask[i]) !=
330 (((const uint8_t *)item->last)[i] & mask[i])) {
332 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
334 "range between \"spec\" and \"last\" not"
335 " comprised in \"mask\"");
343 * Transpose flow rule description to rtnetlink message.
345 * This function transposes a flow rule description to a traffic control
346 * (TC) filter creation message ready to be sent over Netlink.
348 * Target interface is specified as the first entry of the @p ptoi table.
349 * Subsequent entries enable this function to resolve other DPDK port IDs
350 * found in the flow rule.
353 * Output message buffer. May be NULL when @p size is 0.
355 * Size of @p buf. Message may be truncated if not large enough.
357 * DPDK port ID to network interface index translation table. This table
358 * is terminated by an entry with a zero ifindex value.
360 * Flow rule attributes.
362 * Pattern specification.
364 * Associated actions.
366 * Perform verbose error reporting if not NULL.
369 * A positive value representing the exact size of the message in bytes
370 * regardless of the @p size parameter on success, a negative errno value
371 * otherwise and rte_errno is set.
374 mlx5_nl_flow_transpose(void *buf,
376 const struct mlx5_nl_flow_ptoi *ptoi,
377 const struct rte_flow_attr *attr,
378 const struct rte_flow_item *pattern,
379 const struct rte_flow_action *actions,
380 struct rte_flow_error *error)
382 alignas(struct nlmsghdr)
383 uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
384 const struct rte_flow_item *item;
385 const struct rte_flow_action *action;
387 uint32_t act_index_cur;
391 bool vlan_eth_type_set;
393 struct nlattr *na_flower;
394 struct nlattr *na_flower_act;
395 struct nlattr *na_vlan_id;
396 struct nlattr *na_vlan_priority;
397 const enum mlx5_nl_flow_trans *trans;
398 const enum mlx5_nl_flow_trans *back;
407 in_port_id_set = false;
408 eth_type_set = false;
409 vlan_present = false;
410 vlan_eth_type_set = false;
411 ip_proto_set = false;
413 na_flower_act = NULL;
415 na_vlan_priority = NULL;
419 switch (trans[n++]) {
421 const struct rte_flow_item_port_id *port_id;
422 const struct rte_flow_item_eth *eth;
423 const struct rte_flow_item_vlan *vlan;
424 const struct rte_flow_item_ipv4 *ipv4;
425 const struct rte_flow_item_ipv6 *ipv6;
426 const struct rte_flow_item_tcp *tcp;
427 const struct rte_flow_item_udp *udp;
430 const struct rte_flow_action_port_id *port_id;
431 const struct rte_flow_action_of_push_vlan *of_push_vlan;
432 const struct rte_flow_action_of_set_vlan_vid *
434 const struct rte_flow_action_of_set_vlan_pcp *
437 struct nlmsghdr *nlh;
439 struct nlattr *act_index;
445 return rte_flow_error_set
446 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
447 item, "unsupported pattern item combination");
448 else if (action->type)
449 return rte_flow_error_set
450 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
451 action, "unsupported action combination");
452 return rte_flow_error_set
453 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
454 "flow rule lacks some kind of fate action");
461 * Supported attributes: no groups, some priorities and
462 * ingress only. Don't care about transfer as it is the
466 return rte_flow_error_set
468 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
469 attr, "groups are not supported");
470 if (attr->priority > 0xfffe)
471 return rte_flow_error_set
473 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
474 attr, "lowest priority level is 0xfffe");
476 return rte_flow_error_set
478 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
479 attr, "only ingress is supported");
481 return rte_flow_error_set
483 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
484 attr, "egress is not supported");
485 if (size < mnl_nlmsg_size(sizeof(*tcm)))
487 nlh = mnl_nlmsg_put_header(buf);
489 nlh->nlmsg_flags = 0;
491 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
492 tcm->tcm_family = AF_UNSPEC;
493 tcm->tcm_ifindex = ptoi[0].ifindex;
495 * Let kernel pick a handle by default. A predictable handle
496 * can be set by the caller on the resulting buffer through
497 * mlx5_nl_flow_brand().
500 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
502 * Priority cannot be zero to prevent the kernel from
503 * picking one automatically.
505 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
506 RTE_BE16(ETH_P_ALL));
509 if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
511 na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
514 if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
515 TCA_CLS_FLAGS_SKIP_SW))
519 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
524 if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
526 mask.port_id = mlx5_nl_flow_item_mask
527 (item, &rte_flow_item_port_id_mask,
528 &mlx5_nl_flow_mask_supported.port_id,
529 &mlx5_nl_flow_mask_empty.port_id,
530 sizeof(mlx5_nl_flow_mask_supported.port_id), error);
533 if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
538 spec.port_id = item->spec;
539 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
540 return rte_flow_error_set
541 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
543 "no support for partial mask on"
545 if (!mask.port_id->id)
548 for (i = 0; ptoi[i].ifindex; ++i)
549 if (ptoi[i].port_id == spec.port_id->id)
551 if (!ptoi[i].ifindex)
552 return rte_flow_error_set
553 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
555 "missing data to convert port ID to ifindex");
556 tcm = mnl_nlmsg_get_payload(buf);
557 if (in_port_id_set &&
558 ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
559 return rte_flow_error_set
560 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
562 "cannot match traffic for several port IDs"
563 " through a single flow rule");
564 tcm->tcm_ifindex = ptoi[i].ifindex;
569 if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
571 mask.eth = mlx5_nl_flow_item_mask
572 (item, &rte_flow_item_eth_mask,
573 &mlx5_nl_flow_mask_supported.eth,
574 &mlx5_nl_flow_mask_empty.eth,
575 sizeof(mlx5_nl_flow_mask_supported.eth), error);
578 if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
582 spec.eth = item->spec;
583 if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
584 return rte_flow_error_set
585 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
587 "no support for partial mask on"
589 if (mask.eth->type) {
590 if (!mnl_attr_put_u16_check(buf, size,
591 TCA_FLOWER_KEY_ETH_TYPE,
596 if ((!is_zero_ether_addr(&mask.eth->dst) &&
597 (!mnl_attr_put_check(buf, size,
598 TCA_FLOWER_KEY_ETH_DST,
600 spec.eth->dst.addr_bytes) ||
601 !mnl_attr_put_check(buf, size,
602 TCA_FLOWER_KEY_ETH_DST_MASK,
604 mask.eth->dst.addr_bytes))) ||
605 (!is_zero_ether_addr(&mask.eth->src) &&
606 (!mnl_attr_put_check(buf, size,
607 TCA_FLOWER_KEY_ETH_SRC,
609 spec.eth->src.addr_bytes) ||
610 !mnl_attr_put_check(buf, size,
611 TCA_FLOWER_KEY_ETH_SRC_MASK,
613 mask.eth->src.addr_bytes))))
618 if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
620 mask.vlan = mlx5_nl_flow_item_mask
621 (item, &rte_flow_item_vlan_mask,
622 &mlx5_nl_flow_mask_supported.vlan,
623 &mlx5_nl_flow_mask_empty.vlan,
624 sizeof(mlx5_nl_flow_mask_supported.vlan), error);
628 !mnl_attr_put_u16_check(buf, size,
629 TCA_FLOWER_KEY_ETH_TYPE,
630 RTE_BE16(ETH_P_8021Q)))
634 if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
638 spec.vlan = item->spec;
639 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
640 (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
641 (mask.vlan->tci & RTE_BE16(0x0fff) &&
642 (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
643 (mask.vlan->inner_type &&
644 mask.vlan->inner_type != RTE_BE16(0xffff)))
645 return rte_flow_error_set
646 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
648 "no support for partial masks on"
649 " \"tci\" (PCP and VID parts) and"
650 " \"inner_type\" fields");
651 if (mask.vlan->inner_type) {
652 if (!mnl_attr_put_u16_check
653 (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
654 spec.vlan->inner_type))
656 vlan_eth_type_set = 1;
658 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
659 !mnl_attr_put_u8_check
660 (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
661 (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
662 (mask.vlan->tci & RTE_BE16(0x0fff) &&
663 !mnl_attr_put_u16_check
664 (buf, size, TCA_FLOWER_KEY_VLAN_ID,
665 rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
670 if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
672 mask.ipv4 = mlx5_nl_flow_item_mask
673 (item, &rte_flow_item_ipv4_mask,
674 &mlx5_nl_flow_mask_supported.ipv4,
675 &mlx5_nl_flow_mask_empty.ipv4,
676 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
679 if ((!eth_type_set || !vlan_eth_type_set) &&
680 !mnl_attr_put_u16_check(buf, size,
682 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
683 TCA_FLOWER_KEY_ETH_TYPE,
687 vlan_eth_type_set = 1;
688 if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
692 spec.ipv4 = item->spec;
693 if (mask.ipv4->hdr.next_proto_id &&
694 mask.ipv4->hdr.next_proto_id != 0xff)
695 return rte_flow_error_set
696 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
698 "no support for partial mask on"
699 " \"hdr.next_proto_id\" field");
700 if (mask.ipv4->hdr.next_proto_id) {
701 if (!mnl_attr_put_u8_check
702 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
703 spec.ipv4->hdr.next_proto_id))
707 if ((mask.ipv4->hdr.src_addr &&
708 (!mnl_attr_put_u32_check(buf, size,
709 TCA_FLOWER_KEY_IPV4_SRC,
710 spec.ipv4->hdr.src_addr) ||
711 !mnl_attr_put_u32_check(buf, size,
712 TCA_FLOWER_KEY_IPV4_SRC_MASK,
713 mask.ipv4->hdr.src_addr))) ||
714 (mask.ipv4->hdr.dst_addr &&
715 (!mnl_attr_put_u32_check(buf, size,
716 TCA_FLOWER_KEY_IPV4_DST,
717 spec.ipv4->hdr.dst_addr) ||
718 !mnl_attr_put_u32_check(buf, size,
719 TCA_FLOWER_KEY_IPV4_DST_MASK,
720 mask.ipv4->hdr.dst_addr))))
725 if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
727 mask.ipv6 = mlx5_nl_flow_item_mask
728 (item, &rte_flow_item_ipv6_mask,
729 &mlx5_nl_flow_mask_supported.ipv6,
730 &mlx5_nl_flow_mask_empty.ipv6,
731 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
734 if ((!eth_type_set || !vlan_eth_type_set) &&
735 !mnl_attr_put_u16_check(buf, size,
737 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
738 TCA_FLOWER_KEY_ETH_TYPE,
739 RTE_BE16(ETH_P_IPV6)))
742 vlan_eth_type_set = 1;
743 if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
747 spec.ipv6 = item->spec;
748 if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
749 return rte_flow_error_set
750 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
752 "no support for partial mask on"
753 " \"hdr.proto\" field");
754 if (mask.ipv6->hdr.proto) {
755 if (!mnl_attr_put_u8_check
756 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
757 spec.ipv6->hdr.proto))
761 if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
762 (!mnl_attr_put_check(buf, size,
763 TCA_FLOWER_KEY_IPV6_SRC,
764 sizeof(spec.ipv6->hdr.src_addr),
765 spec.ipv6->hdr.src_addr) ||
766 !mnl_attr_put_check(buf, size,
767 TCA_FLOWER_KEY_IPV6_SRC_MASK,
768 sizeof(mask.ipv6->hdr.src_addr),
769 mask.ipv6->hdr.src_addr))) ||
770 (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
771 (!mnl_attr_put_check(buf, size,
772 TCA_FLOWER_KEY_IPV6_DST,
773 sizeof(spec.ipv6->hdr.dst_addr),
774 spec.ipv6->hdr.dst_addr) ||
775 !mnl_attr_put_check(buf, size,
776 TCA_FLOWER_KEY_IPV6_DST_MASK,
777 sizeof(mask.ipv6->hdr.dst_addr),
778 mask.ipv6->hdr.dst_addr))))
783 if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
785 mask.tcp = mlx5_nl_flow_item_mask
786 (item, &rte_flow_item_tcp_mask,
787 &mlx5_nl_flow_mask_supported.tcp,
788 &mlx5_nl_flow_mask_empty.tcp,
789 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
793 !mnl_attr_put_u8_check(buf, size,
794 TCA_FLOWER_KEY_IP_PROTO,
797 if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
801 spec.tcp = item->spec;
802 if ((mask.tcp->hdr.src_port &&
803 (!mnl_attr_put_u16_check(buf, size,
804 TCA_FLOWER_KEY_TCP_SRC,
805 spec.tcp->hdr.src_port) ||
806 !mnl_attr_put_u16_check(buf, size,
807 TCA_FLOWER_KEY_TCP_SRC_MASK,
808 mask.tcp->hdr.src_port))) ||
809 (mask.tcp->hdr.dst_port &&
810 (!mnl_attr_put_u16_check(buf, size,
811 TCA_FLOWER_KEY_TCP_DST,
812 spec.tcp->hdr.dst_port) ||
813 !mnl_attr_put_u16_check(buf, size,
814 TCA_FLOWER_KEY_TCP_DST_MASK,
815 mask.tcp->hdr.dst_port))))
820 if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
822 mask.udp = mlx5_nl_flow_item_mask
823 (item, &rte_flow_item_udp_mask,
824 &mlx5_nl_flow_mask_supported.udp,
825 &mlx5_nl_flow_mask_empty.udp,
826 sizeof(mlx5_nl_flow_mask_supported.udp), error);
830 !mnl_attr_put_u8_check(buf, size,
831 TCA_FLOWER_KEY_IP_PROTO,
834 if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
838 spec.udp = item->spec;
839 if ((mask.udp->hdr.src_port &&
840 (!mnl_attr_put_u16_check(buf, size,
841 TCA_FLOWER_KEY_UDP_SRC,
842 spec.udp->hdr.src_port) ||
843 !mnl_attr_put_u16_check(buf, size,
844 TCA_FLOWER_KEY_UDP_SRC_MASK,
845 mask.udp->hdr.src_port))) ||
846 (mask.udp->hdr.dst_port &&
847 (!mnl_attr_put_u16_check(buf, size,
848 TCA_FLOWER_KEY_UDP_DST,
849 spec.udp->hdr.dst_port) ||
850 !mnl_attr_put_u16_check(buf, size,
851 TCA_FLOWER_KEY_UDP_DST_MASK,
852 mask.udp->hdr.dst_port))))
857 if (item->type != RTE_FLOW_ITEM_TYPE_END)
860 assert(!na_flower_act);
862 mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
868 if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
873 if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
875 conf.port_id = action->conf;
876 if (conf.port_id->original)
879 for (i = 0; ptoi[i].ifindex; ++i)
880 if (ptoi[i].port_id == conf.port_id->id)
882 if (!ptoi[i].ifindex)
883 return rte_flow_error_set
884 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
886 "missing data to convert port ID to ifindex");
888 mnl_attr_nest_start_check(buf, size, act_index_cur++);
890 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
892 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
895 if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
896 sizeof(struct tc_mirred),
898 .action = TC_ACT_STOLEN,
899 .eaction = TCA_EGRESS_REDIR,
900 .ifindex = ptoi[i].ifindex,
903 mnl_attr_nest_end(buf, act);
904 mnl_attr_nest_end(buf, act_index);
908 if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
911 mnl_attr_nest_start_check(buf, size, act_index_cur++);
913 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
915 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
918 if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
919 sizeof(struct tc_gact),
921 .action = TC_ACT_SHOT,
924 mnl_attr_nest_end(buf, act);
925 mnl_attr_nest_end(buf, act_index);
928 case ACTION_OF_POP_VLAN:
929 if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
931 conf.of_push_vlan = NULL;
932 i = TCA_VLAN_ACT_POP;
934 case ACTION_OF_PUSH_VLAN:
935 if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
937 conf.of_push_vlan = action->conf;
938 i = TCA_VLAN_ACT_PUSH;
940 case ACTION_OF_SET_VLAN_VID:
941 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
943 conf.of_set_vlan_vid = action->conf;
945 goto override_na_vlan_id;
946 i = TCA_VLAN_ACT_MODIFY;
948 case ACTION_OF_SET_VLAN_PCP:
949 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
951 conf.of_set_vlan_pcp = action->conf;
952 if (na_vlan_priority)
953 goto override_na_vlan_priority;
954 i = TCA_VLAN_ACT_MODIFY;
958 mnl_attr_nest_start_check(buf, size, act_index_cur++);
960 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
962 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
965 if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
966 sizeof(struct tc_vlan),
968 .action = TC_ACT_PIPE,
972 if (i == TCA_VLAN_ACT_POP) {
973 mnl_attr_nest_end(buf, act);
974 mnl_attr_nest_end(buf, act_index);
978 if (i == TCA_VLAN_ACT_PUSH &&
979 !mnl_attr_put_u16_check(buf, size,
980 TCA_VLAN_PUSH_VLAN_PROTOCOL,
981 conf.of_push_vlan->ethertype))
983 na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
984 if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
986 na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
987 if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
989 mnl_attr_nest_end(buf, act);
990 mnl_attr_nest_end(buf, act_index);
991 if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
993 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
994 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
996 (conf.of_set_vlan_vid->vlan_vid);
997 } else if (action->type ==
998 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
999 override_na_vlan_priority:
1000 na_vlan_priority->nla_type =
1001 TCA_VLAN_PUSH_VLAN_PRIORITY;
1002 *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
1003 conf.of_set_vlan_pcp->vlan_pcp;
1008 if (item->type != RTE_FLOW_ITEM_TYPE_END ||
1009 action->type != RTE_FLOW_ACTION_TYPE_END)
1012 mnl_attr_nest_end(buf, na_flower_act);
1014 mnl_attr_nest_end(buf, na_flower);
1016 return nlh->nlmsg_len;
1019 trans = mlx5_nl_flow_trans[trans[n - 1]];
1023 if (buf != buf_tmp) {
1025 size = sizeof(buf_tmp);
1028 return rte_flow_error_set
1029 (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1030 "generated TC message is too large");
1034 * Brand rtnetlink buffer with unique handle.
1036 * This handle should be unique for a given network interface to avoid
1040 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1042 * Unique 32-bit handle to use.
1045 mlx5_nl_flow_brand(void *buf, uint32_t handle)
1047 struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
1049 tcm->tcm_handle = handle;
1053 * Send Netlink message with acknowledgment.
1056 * Libmnl socket to use.
1058 * Message to send. This function always raises the NLM_F_ACK flag before
1062 * 0 on success, a negative errno value otherwise and rte_errno is set.
1065 mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1067 alignas(struct nlmsghdr)
1068 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1069 nlh->nlmsg_len - sizeof(*nlh)];
1070 uint32_t seq = random();
1073 nlh->nlmsg_flags |= NLM_F_ACK;
1074 nlh->nlmsg_seq = seq;
1075 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1077 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1080 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1088 * Create a Netlink flow rule.
1091 * Libmnl socket to use.
1093 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1095 * Perform verbose error reporting if not NULL.
1098 * 0 on success, a negative errno value otherwise and rte_errno is set.
1101 mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
1102 struct rte_flow_error *error)
1104 struct nlmsghdr *nlh = buf;
1106 nlh->nlmsg_type = RTM_NEWTFILTER;
1107 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1108 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1110 return rte_flow_error_set
1111 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1112 "netlink: failed to create TC flow rule");
1116 * Destroy a Netlink flow rule.
1119 * Libmnl socket to use.
1121 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1123 * Perform verbose error reporting if not NULL.
1126 * 0 on success, a negative errno value otherwise and rte_errno is set.
1129 mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
1130 struct rte_flow_error *error)
1132 struct nlmsghdr *nlh = buf;
1134 nlh->nlmsg_type = RTM_DELTFILTER;
1135 nlh->nlmsg_flags = NLM_F_REQUEST;
1136 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1138 return rte_flow_error_set
1139 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1140 "netlink: failed to destroy TC flow rule");
1144 * Initialize ingress qdisc of a given network interface.
1147 * Libmnl socket of the @p NETLINK_ROUTE kind.
1149 * Index of network interface to initialize.
1151 * Perform verbose error reporting if not NULL.
1154 * 0 on success, a negative errno value otherwise and rte_errno is set.
1157 mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
1158 struct rte_flow_error *error)
1160 struct nlmsghdr *nlh;
1162 alignas(struct nlmsghdr)
1163 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1165 /* Destroy existing ingress qdisc and everything attached to it. */
1166 nlh = mnl_nlmsg_put_header(buf);
1167 nlh->nlmsg_type = RTM_DELQDISC;
1168 nlh->nlmsg_flags = NLM_F_REQUEST;
1169 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1170 tcm->tcm_family = AF_UNSPEC;
1171 tcm->tcm_ifindex = ifindex;
1172 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1173 tcm->tcm_parent = TC_H_INGRESS;
1174 /* Ignore errors when qdisc is already absent. */
1175 if (mlx5_nl_flow_nl_ack(nl, nlh) &&
1176 rte_errno != EINVAL && rte_errno != ENOENT)
1177 return rte_flow_error_set
1178 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1179 NULL, "netlink: failed to remove ingress qdisc");
1180 /* Create fresh ingress qdisc. */
1181 nlh = mnl_nlmsg_put_header(buf);
1182 nlh->nlmsg_type = RTM_NEWQDISC;
1183 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1184 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1185 tcm->tcm_family = AF_UNSPEC;
1186 tcm->tcm_ifindex = ifindex;
1187 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1188 tcm->tcm_parent = TC_H_INGRESS;
1189 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1190 if (mlx5_nl_flow_nl_ack(nl, nlh))
1191 return rte_flow_error_set
1192 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1193 NULL, "netlink: failed to create ingress qdisc");
1198 * Create and configure a libmnl socket for Netlink flow rules.
1201 * A valid libmnl socket object pointer on success, NULL otherwise and
1205 mlx5_nl_flow_socket_create(void)
1207 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1210 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1212 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1217 mnl_socket_close(nl);
1222 * Destroy a libmnl socket.
1225 mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
1227 mnl_socket_close(nl);