1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
31 /* Normally found in linux/netlink.h. */
32 #ifndef NETLINK_CAP_ACK
33 #define NETLINK_CAP_ACK 10
36 /* Normally found in linux/pkt_sched.h. */
37 #ifndef TC_H_MIN_INGRESS
38 #define TC_H_MIN_INGRESS 0xfff2u
41 /* Normally found in linux/pkt_cls.h. */
42 #ifndef TCA_CLS_FLAGS_SKIP_SW
43 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
45 #ifndef HAVE_TCA_FLOWER_ACT
46 #define TCA_FLOWER_ACT 3
48 #ifndef HAVE_TCA_FLOWER_FLAGS
49 #define TCA_FLOWER_FLAGS 22
51 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
52 #define TCA_FLOWER_KEY_ETH_TYPE 8
54 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
55 #define TCA_FLOWER_KEY_ETH_DST 4
57 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
58 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
60 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
61 #define TCA_FLOWER_KEY_ETH_SRC 6
63 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
64 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
66 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
67 #define TCA_FLOWER_KEY_IP_PROTO 9
69 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
70 #define TCA_FLOWER_KEY_IPV4_SRC 10
72 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
73 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
75 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
76 #define TCA_FLOWER_KEY_IPV4_DST 12
78 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
79 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
81 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
82 #define TCA_FLOWER_KEY_IPV6_SRC 14
84 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
85 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
87 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
88 #define TCA_FLOWER_KEY_IPV6_DST 16
90 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
91 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
93 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
94 #define TCA_FLOWER_KEY_TCP_SRC 18
96 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
97 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
99 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
100 #define TCA_FLOWER_KEY_TCP_DST 19
102 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
103 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
105 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
106 #define TCA_FLOWER_KEY_UDP_SRC 20
108 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
109 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
111 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
112 #define TCA_FLOWER_KEY_UDP_DST 21
114 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
115 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
118 /** Parser state definitions for mlx5_nl_flow_trans[]. */
119 enum mlx5_nl_flow_trans {
137 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
139 #define PATTERN_COMMON \
141 #define ACTIONS_COMMON \
143 #define ACTIONS_FATE \
144 ACTION_PORT_ID, ACTION_DROP
146 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
147 static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
150 [ATTR] = TRANS(PATTERN),
151 [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
152 [ITEM_VOID] = TRANS(BACK),
153 [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
154 [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
155 [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
156 [ITEM_TCP] = TRANS(PATTERN_COMMON),
157 [ITEM_UDP] = TRANS(PATTERN_COMMON),
158 [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
159 [ACTION_VOID] = TRANS(BACK),
160 [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
161 [ACTION_DROP] = TRANS(ACTION_VOID, END),
165 /** Empty masks for known item types. */
167 struct rte_flow_item_eth eth;
168 struct rte_flow_item_ipv4 ipv4;
169 struct rte_flow_item_ipv6 ipv6;
170 struct rte_flow_item_tcp tcp;
171 struct rte_flow_item_udp udp;
172 } mlx5_nl_flow_mask_empty;
174 /** Supported masks for known item types. */
175 static const struct {
176 struct rte_flow_item_eth eth;
177 struct rte_flow_item_ipv4 ipv4;
178 struct rte_flow_item_ipv6 ipv6;
179 struct rte_flow_item_tcp tcp;
180 struct rte_flow_item_udp udp;
181 } mlx5_nl_flow_mask_supported = {
183 .type = RTE_BE16(0xffff),
184 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
185 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
188 .next_proto_id = 0xff,
189 .src_addr = RTE_BE32(0xffffffff),
190 .dst_addr = RTE_BE32(0xffffffff),
195 "\xff\xff\xff\xff\xff\xff\xff\xff"
196 "\xff\xff\xff\xff\xff\xff\xff\xff",
198 "\xff\xff\xff\xff\xff\xff\xff\xff"
199 "\xff\xff\xff\xff\xff\xff\xff\xff",
202 .src_port = RTE_BE16(0xffff),
203 .dst_port = RTE_BE16(0xffff),
206 .src_port = RTE_BE16(0xffff),
207 .dst_port = RTE_BE16(0xffff),
212 * Retrieve mask for pattern item.
214 * This function does basic sanity checks on a pattern item in order to
215 * return the most appropriate mask for it.
218 * Item specification.
219 * @param[in] mask_default
220 * Default mask for pattern item as specified by the flow API.
221 * @param[in] mask_supported
222 * Mask fields supported by the implementation.
223 * @param[in] mask_empty
224 * Empty mask to return when there is no specification.
226 * Perform verbose error reporting if not NULL.
229 * Either @p item->mask or one of the mask parameters on success, NULL
230 * otherwise and rte_errno is set.
233 mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
234 const void *mask_default,
235 const void *mask_supported,
236 const void *mask_empty,
238 struct rte_flow_error *error)
243 /* item->last and item->mask cannot exist without item->spec. */
244 if (!item->spec && (item->mask || item->last)) {
246 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
247 "\"mask\" or \"last\" field provided without a"
248 " corresponding \"spec\"");
251 /* No spec, no mask, no problem. */
254 mask = item->mask ? item->mask : mask_default;
257 * Single-pass check to make sure that:
258 * - Mask is supported, no bits are set outside mask_supported.
259 * - Both item->spec and item->last are included in mask.
261 for (i = 0; i != mask_size; ++i) {
264 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
265 ((const uint8_t *)mask_supported)[i]) {
267 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
268 mask, "unsupported field found in \"mask\"");
272 (((const uint8_t *)item->spec)[i] & mask[i]) !=
273 (((const uint8_t *)item->last)[i] & mask[i])) {
275 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
277 "range between \"spec\" and \"last\" not"
278 " comprised in \"mask\"");
286 * Transpose flow rule description to rtnetlink message.
288 * This function transposes a flow rule description to a traffic control
289 * (TC) filter creation message ready to be sent over Netlink.
291 * Target interface is specified as the first entry of the @p ptoi table.
292 * Subsequent entries enable this function to resolve other DPDK port IDs
293 * found in the flow rule.
296 * Output message buffer. May be NULL when @p size is 0.
298 * Size of @p buf. Message may be truncated if not large enough.
300 * DPDK port ID to network interface index translation table. This table
301 * is terminated by an entry with a zero ifindex value.
303 * Flow rule attributes.
305 * Pattern specification.
307 * Associated actions.
309 * Perform verbose error reporting if not NULL.
312 * A positive value representing the exact size of the message in bytes
313 * regardless of the @p size parameter on success, a negative errno value
314 * otherwise and rte_errno is set.
317 mlx5_nl_flow_transpose(void *buf,
319 const struct mlx5_nl_flow_ptoi *ptoi,
320 const struct rte_flow_attr *attr,
321 const struct rte_flow_item *pattern,
322 const struct rte_flow_action *actions,
323 struct rte_flow_error *error)
325 alignas(struct nlmsghdr)
326 uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
327 const struct rte_flow_item *item;
328 const struct rte_flow_action *action;
330 uint32_t act_index_cur;
333 struct nlattr *na_flower;
334 struct nlattr *na_flower_act;
335 const enum mlx5_nl_flow_trans *trans;
336 const enum mlx5_nl_flow_trans *back;
345 eth_type_set = false;
346 ip_proto_set = false;
348 na_flower_act = NULL;
352 switch (trans[n++]) {
354 const struct rte_flow_item_eth *eth;
355 const struct rte_flow_item_ipv4 *ipv4;
356 const struct rte_flow_item_ipv6 *ipv6;
357 const struct rte_flow_item_tcp *tcp;
358 const struct rte_flow_item_udp *udp;
361 const struct rte_flow_action_port_id *port_id;
363 struct nlmsghdr *nlh;
365 struct nlattr *act_index;
371 return rte_flow_error_set
372 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
373 item, "unsupported pattern item combination");
374 else if (action->type)
375 return rte_flow_error_set
376 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
377 action, "unsupported action combination");
378 return rte_flow_error_set
379 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
380 "flow rule lacks some kind of fate action");
387 * Supported attributes: no groups, some priorities and
388 * ingress only. Don't care about transfer as it is the
392 return rte_flow_error_set
394 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
395 attr, "groups are not supported");
396 if (attr->priority > 0xfffe)
397 return rte_flow_error_set
399 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
400 attr, "lowest priority level is 0xfffe");
402 return rte_flow_error_set
404 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
405 attr, "only ingress is supported");
407 return rte_flow_error_set
409 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
410 attr, "egress is not supported");
411 if (size < mnl_nlmsg_size(sizeof(*tcm)))
413 nlh = mnl_nlmsg_put_header(buf);
415 nlh->nlmsg_flags = 0;
417 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
418 tcm->tcm_family = AF_UNSPEC;
419 tcm->tcm_ifindex = ptoi[0].ifindex;
421 * Let kernel pick a handle by default. A predictable handle
422 * can be set by the caller on the resulting buffer through
423 * mlx5_nl_flow_brand().
426 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
428 * Priority cannot be zero to prevent the kernel from
429 * picking one automatically.
431 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
432 RTE_BE16(ETH_P_ALL));
435 if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
437 na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
440 if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
441 TCA_CLS_FLAGS_SKIP_SW))
445 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
450 if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
452 mask.eth = mlx5_nl_flow_item_mask
453 (item, &rte_flow_item_eth_mask,
454 &mlx5_nl_flow_mask_supported.eth,
455 &mlx5_nl_flow_mask_empty.eth,
456 sizeof(mlx5_nl_flow_mask_supported.eth), error);
459 if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
463 spec.eth = item->spec;
464 if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
465 return rte_flow_error_set
466 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
468 "no support for partial mask on"
470 if (mask.eth->type) {
471 if (!mnl_attr_put_u16_check(buf, size,
472 TCA_FLOWER_KEY_ETH_TYPE,
477 if ((!is_zero_ether_addr(&mask.eth->dst) &&
478 (!mnl_attr_put_check(buf, size,
479 TCA_FLOWER_KEY_ETH_DST,
481 spec.eth->dst.addr_bytes) ||
482 !mnl_attr_put_check(buf, size,
483 TCA_FLOWER_KEY_ETH_DST_MASK,
485 mask.eth->dst.addr_bytes))) ||
486 (!is_zero_ether_addr(&mask.eth->src) &&
487 (!mnl_attr_put_check(buf, size,
488 TCA_FLOWER_KEY_ETH_SRC,
490 spec.eth->src.addr_bytes) ||
491 !mnl_attr_put_check(buf, size,
492 TCA_FLOWER_KEY_ETH_SRC_MASK,
494 mask.eth->src.addr_bytes))))
499 if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
501 mask.ipv4 = mlx5_nl_flow_item_mask
502 (item, &rte_flow_item_ipv4_mask,
503 &mlx5_nl_flow_mask_supported.ipv4,
504 &mlx5_nl_flow_mask_empty.ipv4,
505 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
509 !mnl_attr_put_u16_check(buf, size,
510 TCA_FLOWER_KEY_ETH_TYPE,
514 if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
518 spec.ipv4 = item->spec;
519 if (mask.ipv4->hdr.next_proto_id &&
520 mask.ipv4->hdr.next_proto_id != 0xff)
521 return rte_flow_error_set
522 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
524 "no support for partial mask on"
525 " \"hdr.next_proto_id\" field");
526 if (mask.ipv4->hdr.next_proto_id) {
527 if (!mnl_attr_put_u8_check
528 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
529 spec.ipv4->hdr.next_proto_id))
533 if ((mask.ipv4->hdr.src_addr &&
534 (!mnl_attr_put_u32_check(buf, size,
535 TCA_FLOWER_KEY_IPV4_SRC,
536 spec.ipv4->hdr.src_addr) ||
537 !mnl_attr_put_u32_check(buf, size,
538 TCA_FLOWER_KEY_IPV4_SRC_MASK,
539 mask.ipv4->hdr.src_addr))) ||
540 (mask.ipv4->hdr.dst_addr &&
541 (!mnl_attr_put_u32_check(buf, size,
542 TCA_FLOWER_KEY_IPV4_DST,
543 spec.ipv4->hdr.dst_addr) ||
544 !mnl_attr_put_u32_check(buf, size,
545 TCA_FLOWER_KEY_IPV4_DST_MASK,
546 mask.ipv4->hdr.dst_addr))))
551 if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
553 mask.ipv6 = mlx5_nl_flow_item_mask
554 (item, &rte_flow_item_ipv6_mask,
555 &mlx5_nl_flow_mask_supported.ipv6,
556 &mlx5_nl_flow_mask_empty.ipv6,
557 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
561 !mnl_attr_put_u16_check(buf, size,
562 TCA_FLOWER_KEY_ETH_TYPE,
563 RTE_BE16(ETH_P_IPV6)))
566 if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
570 spec.ipv6 = item->spec;
571 if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
572 return rte_flow_error_set
573 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
575 "no support for partial mask on"
576 " \"hdr.proto\" field");
577 if (mask.ipv6->hdr.proto) {
578 if (!mnl_attr_put_u8_check
579 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
580 spec.ipv6->hdr.proto))
584 if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
585 (!mnl_attr_put_check(buf, size,
586 TCA_FLOWER_KEY_IPV6_SRC,
587 sizeof(spec.ipv6->hdr.src_addr),
588 spec.ipv6->hdr.src_addr) ||
589 !mnl_attr_put_check(buf, size,
590 TCA_FLOWER_KEY_IPV6_SRC_MASK,
591 sizeof(mask.ipv6->hdr.src_addr),
592 mask.ipv6->hdr.src_addr))) ||
593 (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
594 (!mnl_attr_put_check(buf, size,
595 TCA_FLOWER_KEY_IPV6_DST,
596 sizeof(spec.ipv6->hdr.dst_addr),
597 spec.ipv6->hdr.dst_addr) ||
598 !mnl_attr_put_check(buf, size,
599 TCA_FLOWER_KEY_IPV6_DST_MASK,
600 sizeof(mask.ipv6->hdr.dst_addr),
601 mask.ipv6->hdr.dst_addr))))
606 if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
608 mask.tcp = mlx5_nl_flow_item_mask
609 (item, &rte_flow_item_tcp_mask,
610 &mlx5_nl_flow_mask_supported.tcp,
611 &mlx5_nl_flow_mask_empty.tcp,
612 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
616 !mnl_attr_put_u8_check(buf, size,
617 TCA_FLOWER_KEY_IP_PROTO,
620 if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
624 spec.tcp = item->spec;
625 if ((mask.tcp->hdr.src_port &&
626 mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
627 (mask.tcp->hdr.dst_port &&
628 mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
629 return rte_flow_error_set
630 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
632 "no support for partial masks on"
633 " \"hdr.src_port\" and \"hdr.dst_port\""
635 if ((mask.tcp->hdr.src_port &&
636 (!mnl_attr_put_u16_check(buf, size,
637 TCA_FLOWER_KEY_TCP_SRC,
638 spec.tcp->hdr.src_port) ||
639 !mnl_attr_put_u16_check(buf, size,
640 TCA_FLOWER_KEY_TCP_SRC_MASK,
641 mask.tcp->hdr.src_port))) ||
642 (mask.tcp->hdr.dst_port &&
643 (!mnl_attr_put_u16_check(buf, size,
644 TCA_FLOWER_KEY_TCP_DST,
645 spec.tcp->hdr.dst_port) ||
646 !mnl_attr_put_u16_check(buf, size,
647 TCA_FLOWER_KEY_TCP_DST_MASK,
648 mask.tcp->hdr.dst_port))))
653 if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
655 mask.udp = mlx5_nl_flow_item_mask
656 (item, &rte_flow_item_udp_mask,
657 &mlx5_nl_flow_mask_supported.udp,
658 &mlx5_nl_flow_mask_empty.udp,
659 sizeof(mlx5_nl_flow_mask_supported.udp), error);
663 !mnl_attr_put_u8_check(buf, size,
664 TCA_FLOWER_KEY_IP_PROTO,
667 if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
671 spec.udp = item->spec;
672 if ((mask.udp->hdr.src_port &&
673 mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
674 (mask.udp->hdr.dst_port &&
675 mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
676 return rte_flow_error_set
677 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
679 "no support for partial masks on"
680 " \"hdr.src_port\" and \"hdr.dst_port\""
682 if ((mask.udp->hdr.src_port &&
683 (!mnl_attr_put_u16_check(buf, size,
684 TCA_FLOWER_KEY_UDP_SRC,
685 spec.udp->hdr.src_port) ||
686 !mnl_attr_put_u16_check(buf, size,
687 TCA_FLOWER_KEY_UDP_SRC_MASK,
688 mask.udp->hdr.src_port))) ||
689 (mask.udp->hdr.dst_port &&
690 (!mnl_attr_put_u16_check(buf, size,
691 TCA_FLOWER_KEY_UDP_DST,
692 spec.udp->hdr.dst_port) ||
693 !mnl_attr_put_u16_check(buf, size,
694 TCA_FLOWER_KEY_UDP_DST_MASK,
695 mask.udp->hdr.dst_port))))
700 if (item->type != RTE_FLOW_ITEM_TYPE_END)
703 assert(!na_flower_act);
705 mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
711 if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
716 if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
718 conf.port_id = action->conf;
719 if (conf.port_id->original)
722 for (i = 0; ptoi[i].ifindex; ++i)
723 if (ptoi[i].port_id == conf.port_id->id)
725 if (!ptoi[i].ifindex)
726 return rte_flow_error_set
727 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
729 "missing data to convert port ID to ifindex");
731 mnl_attr_nest_start_check(buf, size, act_index_cur++);
733 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
735 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
738 if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
739 sizeof(struct tc_mirred),
741 .action = TC_ACT_STOLEN,
742 .eaction = TCA_EGRESS_REDIR,
743 .ifindex = ptoi[i].ifindex,
746 mnl_attr_nest_end(buf, act);
747 mnl_attr_nest_end(buf, act_index);
751 if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
754 mnl_attr_nest_start_check(buf, size, act_index_cur++);
756 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
758 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
761 if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
762 sizeof(struct tc_gact),
764 .action = TC_ACT_SHOT,
767 mnl_attr_nest_end(buf, act);
768 mnl_attr_nest_end(buf, act_index);
772 if (item->type != RTE_FLOW_ITEM_TYPE_END ||
773 action->type != RTE_FLOW_ACTION_TYPE_END)
776 mnl_attr_nest_end(buf, na_flower_act);
778 mnl_attr_nest_end(buf, na_flower);
780 return nlh->nlmsg_len;
783 trans = mlx5_nl_flow_trans[trans[n - 1]];
787 if (buf != buf_tmp) {
789 size = sizeof(buf_tmp);
792 return rte_flow_error_set
793 (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
794 "generated TC message is too large");
798 * Brand rtnetlink buffer with unique handle.
800 * This handle should be unique for a given network interface to avoid
804 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
806 * Unique 32-bit handle to use.
809 mlx5_nl_flow_brand(void *buf, uint32_t handle)
811 struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
813 tcm->tcm_handle = handle;
817 * Send Netlink message with acknowledgment.
820 * Libmnl socket to use.
822 * Message to send. This function always raises the NLM_F_ACK flag before
826 * 0 on success, a negative errno value otherwise and rte_errno is set.
829 mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
831 alignas(struct nlmsghdr)
832 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
833 nlh->nlmsg_len - sizeof(*nlh)];
834 uint32_t seq = random();
837 nlh->nlmsg_flags |= NLM_F_ACK;
838 nlh->nlmsg_seq = seq;
839 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
841 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
844 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
852 * Create a Netlink flow rule.
855 * Libmnl socket to use.
857 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
859 * Perform verbose error reporting if not NULL.
862 * 0 on success, a negative errno value otherwise and rte_errno is set.
865 mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
866 struct rte_flow_error *error)
868 struct nlmsghdr *nlh = buf;
870 nlh->nlmsg_type = RTM_NEWTFILTER;
871 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
872 if (!mlx5_nl_flow_nl_ack(nl, nlh))
874 return rte_flow_error_set
875 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
876 "netlink: failed to create TC flow rule");
880 * Destroy a Netlink flow rule.
883 * Libmnl socket to use.
885 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
887 * Perform verbose error reporting if not NULL.
890 * 0 on success, a negative errno value otherwise and rte_errno is set.
893 mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
894 struct rte_flow_error *error)
896 struct nlmsghdr *nlh = buf;
898 nlh->nlmsg_type = RTM_DELTFILTER;
899 nlh->nlmsg_flags = NLM_F_REQUEST;
900 if (!mlx5_nl_flow_nl_ack(nl, nlh))
902 return rte_flow_error_set
903 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
904 "netlink: failed to destroy TC flow rule");
908 * Initialize ingress qdisc of a given network interface.
911 * Libmnl socket of the @p NETLINK_ROUTE kind.
913 * Index of network interface to initialize.
915 * Perform verbose error reporting if not NULL.
918 * 0 on success, a negative errno value otherwise and rte_errno is set.
921 mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
922 struct rte_flow_error *error)
924 struct nlmsghdr *nlh;
926 alignas(struct nlmsghdr)
927 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
929 /* Destroy existing ingress qdisc and everything attached to it. */
930 nlh = mnl_nlmsg_put_header(buf);
931 nlh->nlmsg_type = RTM_DELQDISC;
932 nlh->nlmsg_flags = NLM_F_REQUEST;
933 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
934 tcm->tcm_family = AF_UNSPEC;
935 tcm->tcm_ifindex = ifindex;
936 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
937 tcm->tcm_parent = TC_H_INGRESS;
938 /* Ignore errors when qdisc is already absent. */
939 if (mlx5_nl_flow_nl_ack(nl, nlh) &&
940 rte_errno != EINVAL && rte_errno != ENOENT)
941 return rte_flow_error_set
942 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
943 NULL, "netlink: failed to remove ingress qdisc");
944 /* Create fresh ingress qdisc. */
945 nlh = mnl_nlmsg_put_header(buf);
946 nlh->nlmsg_type = RTM_NEWQDISC;
947 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
948 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
949 tcm->tcm_family = AF_UNSPEC;
950 tcm->tcm_ifindex = ifindex;
951 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
952 tcm->tcm_parent = TC_H_INGRESS;
953 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
954 if (mlx5_nl_flow_nl_ack(nl, nlh))
955 return rte_flow_error_set
956 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
957 NULL, "netlink: failed to create ingress qdisc");
962 * Create and configure a libmnl socket for Netlink flow rules.
965 * A valid libmnl socket object pointer on success, NULL otherwise and
969 mlx5_nl_flow_socket_create(void)
971 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
974 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
976 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
981 mnl_socket_close(nl);
986 * Destroy a libmnl socket.
989 mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
991 mnl_socket_close(nl);