1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 /* Normally found in linux/netlink.h. */
57 #ifndef NETLINK_CAP_ACK
58 #define NETLINK_CAP_ACK 10
61 /* Normally found in linux/pkt_sched.h. */
62 #ifndef TC_H_MIN_INGRESS
63 #define TC_H_MIN_INGRESS 0xfff2u
66 /* Normally found in linux/pkt_cls.h. */
67 #ifndef TCA_CLS_FLAGS_SKIP_SW
68 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
70 #ifndef HAVE_TCA_FLOWER_ACT
71 #define TCA_FLOWER_ACT 3
73 #ifndef HAVE_TCA_FLOWER_FLAGS
74 #define TCA_FLOWER_FLAGS 22
76 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
77 #define TCA_FLOWER_KEY_ETH_TYPE 8
79 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
80 #define TCA_FLOWER_KEY_ETH_DST 4
82 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
83 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
85 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
86 #define TCA_FLOWER_KEY_ETH_SRC 6
88 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
89 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
91 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
92 #define TCA_FLOWER_KEY_IP_PROTO 9
94 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
95 #define TCA_FLOWER_KEY_IPV4_SRC 10
97 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
98 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
100 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
101 #define TCA_FLOWER_KEY_IPV4_DST 12
103 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
104 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
106 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
107 #define TCA_FLOWER_KEY_IPV6_SRC 14
109 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
110 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
112 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
113 #define TCA_FLOWER_KEY_IPV6_DST 16
115 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
116 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
118 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
119 #define TCA_FLOWER_KEY_TCP_SRC 18
121 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
122 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
124 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
125 #define TCA_FLOWER_KEY_TCP_DST 19
127 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
128 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
130 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
131 #define TCA_FLOWER_KEY_UDP_SRC 20
133 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
134 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
136 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
137 #define TCA_FLOWER_KEY_UDP_DST 21
139 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
140 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
142 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
143 #define TCA_FLOWER_KEY_VLAN_ID 23
145 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
146 #define TCA_FLOWER_KEY_VLAN_PRIO 24
148 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
149 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
152 #ifndef IPV6_ADDR_LEN
153 #define IPV6_ADDR_LEN 16
156 /** Empty masks for known item types. */
158 struct rte_flow_item_port_id port_id;
159 struct rte_flow_item_eth eth;
160 struct rte_flow_item_vlan vlan;
161 struct rte_flow_item_ipv4 ipv4;
162 struct rte_flow_item_ipv6 ipv6;
163 struct rte_flow_item_tcp tcp;
164 struct rte_flow_item_udp udp;
165 } flow_tcf_mask_empty;
167 /** Supported masks for known item types. */
168 static const struct {
169 struct rte_flow_item_port_id port_id;
170 struct rte_flow_item_eth eth;
171 struct rte_flow_item_vlan vlan;
172 struct rte_flow_item_ipv4 ipv4;
173 struct rte_flow_item_ipv6 ipv6;
174 struct rte_flow_item_tcp tcp;
175 struct rte_flow_item_udp udp;
176 } flow_tcf_mask_supported = {
181 .type = RTE_BE16(0xffff),
182 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
183 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
186 /* PCP and VID only, no DEI. */
187 .tci = RTE_BE16(0xefff),
188 .inner_type = RTE_BE16(0xffff),
191 .next_proto_id = 0xff,
192 .src_addr = RTE_BE32(0xffffffff),
193 .dst_addr = RTE_BE32(0xffffffff),
198 "\xff\xff\xff\xff\xff\xff\xff\xff"
199 "\xff\xff\xff\xff\xff\xff\xff\xff",
201 "\xff\xff\xff\xff\xff\xff\xff\xff"
202 "\xff\xff\xff\xff\xff\xff\xff\xff",
205 .src_port = RTE_BE16(0xffff),
206 .dst_port = RTE_BE16(0xffff),
209 .src_port = RTE_BE16(0xffff),
210 .dst_port = RTE_BE16(0xffff),
214 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
215 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
216 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
217 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
218 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
220 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
222 /** DPDK port to network interface index (ifindex) conversion. */
223 struct flow_tcf_ptoi {
224 uint16_t port_id; /**< DPDK port ID. */
225 unsigned int ifindex; /**< Network interface index. */
228 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
229 #define MLX5_TCF_VLAN_ACTIONS \
230 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
231 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
234 * Retrieve mask for pattern item.
236 * This function does basic sanity checks on a pattern item in order to
237 * return the most appropriate mask for it.
240 * Item specification.
241 * @param[in] mask_default
242 * Default mask for pattern item as specified by the flow API.
243 * @param[in] mask_supported
244 * Mask fields supported by the implementation.
245 * @param[in] mask_empty
246 * Empty mask to return when there is no specification.
248 * Perform verbose error reporting if not NULL.
251 * Either @p item->mask or one of the mask parameters on success, NULL
252 * otherwise and rte_errno is set.
255 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
256 const void *mask_supported, const void *mask_empty,
257 size_t mask_size, struct rte_flow_error *error)
262 /* item->last and item->mask cannot exist without item->spec. */
263 if (!item->spec && (item->mask || item->last)) {
264 rte_flow_error_set(error, EINVAL,
265 RTE_FLOW_ERROR_TYPE_ITEM, item,
266 "\"mask\" or \"last\" field provided without"
267 " a corresponding \"spec\"");
270 /* No spec, no mask, no problem. */
273 mask = item->mask ? item->mask : mask_default;
276 * Single-pass check to make sure that:
277 * - Mask is supported, no bits are set outside mask_supported.
278 * - Both item->spec and item->last are included in mask.
280 for (i = 0; i != mask_size; ++i) {
283 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
284 ((const uint8_t *)mask_supported)[i]) {
285 rte_flow_error_set(error, ENOTSUP,
286 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
287 "unsupported field found"
292 (((const uint8_t *)item->spec)[i] & mask[i]) !=
293 (((const uint8_t *)item->last)[i] & mask[i])) {
294 rte_flow_error_set(error, EINVAL,
295 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
297 "range between \"spec\" and \"last\""
298 " not comprised in \"mask\"");
306 * Build a conversion table between port ID and ifindex.
309 * Pointer to Ethernet device.
311 * Pointer to ptoi table.
313 * Size of ptoi table provided.
316 * Size of ptoi table filled.
319 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
322 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
323 uint16_t port_id[n + 1];
325 unsigned int own = 0;
327 /* At least one port is needed when no switch domain is present. */
330 port_id[0] = dev->data->port_id;
332 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
336 for (i = 0; i != n; ++i) {
337 struct rte_eth_dev_info dev_info;
339 rte_eth_dev_info_get(port_id[i], &dev_info);
340 if (port_id[i] == dev->data->port_id)
342 ptoi[i].port_id = port_id[i];
343 ptoi[i].ifindex = dev_info.if_index;
345 /* Ensure first entry of ptoi[] is the current device. */
351 /* An entry with zero ifindex terminates ptoi[]. */
358 * Verify the @p attr will be correctly understood by the E-switch.
361 * Pointer to flow attributes
363 * Pointer to error structure.
366 * 0 on success, a negative errno value otherwise and rte_errno is set.
369 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
370 struct rte_flow_error *error)
373 * Supported attributes: no groups, some priorities and ingress only.
374 * Don't care about transfer as it is the caller's problem.
377 return rte_flow_error_set(error, ENOTSUP,
378 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
379 "groups are not supported");
380 if (attr->priority > 0xfffe)
381 return rte_flow_error_set(error, ENOTSUP,
382 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
384 "lowest priority level is 0xfffe");
386 return rte_flow_error_set(error, EINVAL,
387 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
388 attr, "only ingress is supported");
390 return rte_flow_error_set(error, ENOTSUP,
391 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
392 attr, "egress is not supported");
397 * Validate flow for E-Switch.
400 * Pointer to the priv structure.
402 * Pointer to the flow attributes.
404 * Pointer to the list of items.
406 * Pointer to the list of actions.
408 * Pointer to the error structure.
411 * 0 on success, a negative errno value otherwise and rte_ernno is set.
414 flow_tcf_validate(struct rte_eth_dev *dev,
415 const struct rte_flow_attr *attr,
416 const struct rte_flow_item items[],
417 const struct rte_flow_action actions[],
418 struct rte_flow_error *error)
421 const struct rte_flow_item_port_id *port_id;
422 const struct rte_flow_item_eth *eth;
423 const struct rte_flow_item_vlan *vlan;
424 const struct rte_flow_item_ipv4 *ipv4;
425 const struct rte_flow_item_ipv6 *ipv6;
426 const struct rte_flow_item_tcp *tcp;
427 const struct rte_flow_item_udp *udp;
430 const struct rte_flow_action_port_id *port_id;
431 const struct rte_flow_action_of_push_vlan *of_push_vlan;
432 const struct rte_flow_action_of_set_vlan_vid *
434 const struct rte_flow_action_of_set_vlan_pcp *
437 uint32_t item_flags = 0;
438 uint32_t action_flags = 0;
439 uint8_t next_protocol = -1;
440 unsigned int tcm_ifindex = 0;
441 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
442 struct rte_eth_dev *port_id_dev = NULL;
446 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
447 PTOI_TABLE_SZ_MAX(dev)));
448 ret = flow_tcf_validate_attributes(attr, error);
451 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
454 switch (items->type) {
455 case RTE_FLOW_ITEM_TYPE_VOID:
457 case RTE_FLOW_ITEM_TYPE_PORT_ID:
458 mask.port_id = flow_tcf_item_mask
459 (items, &rte_flow_item_port_id_mask,
460 &flow_tcf_mask_supported.port_id,
461 &flow_tcf_mask_empty.port_id,
462 sizeof(flow_tcf_mask_supported.port_id),
466 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
470 spec.port_id = items->spec;
471 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
472 return rte_flow_error_set
474 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
476 "no support for partial mask on"
478 if (!mask.port_id->id)
481 for (i = 0; ptoi[i].ifindex; ++i)
482 if (ptoi[i].port_id == spec.port_id->id)
484 if (!ptoi[i].ifindex)
485 return rte_flow_error_set
487 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
489 "missing data to convert port ID to"
491 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
492 return rte_flow_error_set
494 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
496 "cannot match traffic for"
497 " several port IDs through"
498 " a single flow rule");
499 tcm_ifindex = ptoi[i].ifindex;
502 case RTE_FLOW_ITEM_TYPE_ETH:
503 ret = mlx5_flow_validate_item_eth(items, item_flags,
507 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
509 * Redundant check due to different supported mask.
510 * Same for the rest of items.
512 mask.eth = flow_tcf_item_mask
513 (items, &rte_flow_item_eth_mask,
514 &flow_tcf_mask_supported.eth,
515 &flow_tcf_mask_empty.eth,
516 sizeof(flow_tcf_mask_supported.eth),
520 if (mask.eth->type && mask.eth->type !=
522 return rte_flow_error_set
524 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
526 "no support for partial mask on"
529 case RTE_FLOW_ITEM_TYPE_VLAN:
530 ret = mlx5_flow_validate_item_vlan(items, item_flags,
534 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
535 mask.vlan = flow_tcf_item_mask
536 (items, &rte_flow_item_vlan_mask,
537 &flow_tcf_mask_supported.vlan,
538 &flow_tcf_mask_empty.vlan,
539 sizeof(flow_tcf_mask_supported.vlan),
543 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
544 (mask.vlan->tci & RTE_BE16(0xe000)) !=
546 (mask.vlan->tci & RTE_BE16(0x0fff) &&
547 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
549 (mask.vlan->inner_type &&
550 mask.vlan->inner_type != RTE_BE16(0xffff)))
551 return rte_flow_error_set
553 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
555 "no support for partial masks on"
556 " \"tci\" (PCP and VID parts) and"
557 " \"inner_type\" fields");
559 case RTE_FLOW_ITEM_TYPE_IPV4:
560 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
564 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
565 mask.ipv4 = flow_tcf_item_mask
566 (items, &rte_flow_item_ipv4_mask,
567 &flow_tcf_mask_supported.ipv4,
568 &flow_tcf_mask_empty.ipv4,
569 sizeof(flow_tcf_mask_supported.ipv4),
573 if (mask.ipv4->hdr.next_proto_id &&
574 mask.ipv4->hdr.next_proto_id != 0xff)
575 return rte_flow_error_set
577 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
579 "no support for partial mask on"
580 " \"hdr.next_proto_id\" field");
581 else if (mask.ipv4->hdr.next_proto_id)
583 ((const struct rte_flow_item_ipv4 *)
584 (items->spec))->hdr.next_proto_id;
586 case RTE_FLOW_ITEM_TYPE_IPV6:
587 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
591 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
592 mask.ipv6 = flow_tcf_item_mask
593 (items, &rte_flow_item_ipv6_mask,
594 &flow_tcf_mask_supported.ipv6,
595 &flow_tcf_mask_empty.ipv6,
596 sizeof(flow_tcf_mask_supported.ipv6),
600 if (mask.ipv6->hdr.proto &&
601 mask.ipv6->hdr.proto != 0xff)
602 return rte_flow_error_set
604 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
606 "no support for partial mask on"
607 " \"hdr.proto\" field");
608 else if (mask.ipv6->hdr.proto)
610 ((const struct rte_flow_item_ipv6 *)
611 (items->spec))->hdr.proto;
613 case RTE_FLOW_ITEM_TYPE_UDP:
614 ret = mlx5_flow_validate_item_udp(items, item_flags,
615 next_protocol, error);
618 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
619 mask.udp = flow_tcf_item_mask
620 (items, &rte_flow_item_udp_mask,
621 &flow_tcf_mask_supported.udp,
622 &flow_tcf_mask_empty.udp,
623 sizeof(flow_tcf_mask_supported.udp),
628 case RTE_FLOW_ITEM_TYPE_TCP:
629 ret = mlx5_flow_validate_item_tcp(items, item_flags,
630 next_protocol, error);
633 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
634 mask.tcp = flow_tcf_item_mask
635 (items, &rte_flow_item_tcp_mask,
636 &flow_tcf_mask_supported.tcp,
637 &flow_tcf_mask_empty.tcp,
638 sizeof(flow_tcf_mask_supported.tcp),
644 return rte_flow_error_set(error, ENOTSUP,
645 RTE_FLOW_ERROR_TYPE_ITEM,
646 NULL, "item not supported");
649 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
652 switch (actions->type) {
653 case RTE_FLOW_ACTION_TYPE_VOID:
655 case RTE_FLOW_ACTION_TYPE_PORT_ID:
656 if (action_flags & MLX5_TCF_FATE_ACTIONS)
657 return rte_flow_error_set
659 RTE_FLOW_ERROR_TYPE_ACTION, actions,
660 "can't have multiple fate actions");
661 conf.port_id = actions->conf;
662 if (conf.port_id->original)
665 for (i = 0; ptoi[i].ifindex; ++i)
666 if (ptoi[i].port_id == conf.port_id->id)
668 if (!ptoi[i].ifindex)
669 return rte_flow_error_set
671 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
673 "missing data to convert port ID to"
675 action_flags |= MLX5_FLOW_ACTION_PORT_ID;
676 port_id_dev = &rte_eth_devices[conf.port_id->id];
678 case RTE_FLOW_ACTION_TYPE_DROP:
679 if (action_flags & MLX5_TCF_FATE_ACTIONS)
680 return rte_flow_error_set
682 RTE_FLOW_ERROR_TYPE_ACTION, actions,
683 "can't have multiple fate actions");
684 action_flags |= MLX5_FLOW_ACTION_DROP;
686 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
687 action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
689 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
690 action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
692 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
693 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
694 return rte_flow_error_set
696 RTE_FLOW_ERROR_TYPE_ACTION, actions,
697 "vlan modify is not supported,"
698 " set action must follow push action");
699 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
701 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
702 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
703 return rte_flow_error_set
705 RTE_FLOW_ERROR_TYPE_ACTION, actions,
706 "vlan modify is not supported,"
707 " set action must follow push action");
708 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
711 return rte_flow_error_set(error, ENOTSUP,
712 RTE_FLOW_ERROR_TYPE_ACTION,
714 "action not supported");
718 * FW syndrome (0xA9C090):
719 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
720 * forward to the uplink.
722 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
723 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
724 ((struct priv *)port_id_dev->data->dev_private)->representor)
725 return rte_flow_error_set(error, ENOTSUP,
726 RTE_FLOW_ERROR_TYPE_ACTION, actions,
727 "vlan push can only be applied"
728 " when forwarding to uplink port");
730 * FW syndrome (0x294609):
731 * set_flow_table_entry: modify/pop/push actions in fdb flow table
732 * are supported only while forwarding to vport.
734 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
735 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
736 return rte_flow_error_set(error, ENOTSUP,
737 RTE_FLOW_ERROR_TYPE_ACTION, actions,
738 "vlan actions are supported"
739 " only with port_id action");
740 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
741 return rte_flow_error_set(error, EINVAL,
742 RTE_FLOW_ERROR_TYPE_ACTION, actions,
743 "no fate action is found");
748 * Calculate maximum size of memory for flow items of Linux TC flower and
749 * extract specified items.
752 * Pointer to the list of items.
753 * @param[out] item_flags
754 * Pointer to the detected items.
757 * Maximum size of memory for items.
760 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
761 uint64_t *item_flags)
766 size += SZ_NLATTR_STRZ_OF("flower") +
767 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
768 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
769 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
770 switch (items->type) {
771 case RTE_FLOW_ITEM_TYPE_VOID:
773 case RTE_FLOW_ITEM_TYPE_PORT_ID:
775 case RTE_FLOW_ITEM_TYPE_ETH:
776 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
777 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
778 /* dst/src MAC addr and mask. */
779 flags |= MLX5_FLOW_LAYER_OUTER_L2;
781 case RTE_FLOW_ITEM_TYPE_VLAN:
782 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
783 SZ_NLATTR_TYPE_OF(uint16_t) +
784 /* VLAN Ether type. */
785 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
786 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
787 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
789 case RTE_FLOW_ITEM_TYPE_IPV4:
790 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
791 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
792 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
793 /* dst/src IP addr and mask. */
794 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
796 case RTE_FLOW_ITEM_TYPE_IPV6:
797 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
798 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
799 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
800 /* dst/src IP addr and mask. */
801 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
803 case RTE_FLOW_ITEM_TYPE_UDP:
804 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
805 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
806 /* dst/src port and mask. */
807 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
809 case RTE_FLOW_ITEM_TYPE_TCP:
810 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
811 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
812 /* dst/src port and mask. */
813 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
817 "unsupported item %p type %d,"
818 " items must be validated before flow creation",
819 (const void *)items, items->type);
828 * Calculate maximum size of memory for flow actions of Linux TC flower and
829 * extract specified actions.
832 * Pointer to the list of actions.
833 * @param[out] action_flags
834 * Pointer to the detected actions.
837 * Maximum size of memory for actions.
840 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
841 uint64_t *action_flags)
846 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
847 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
848 switch (actions->type) {
849 case RTE_FLOW_ACTION_TYPE_VOID:
851 case RTE_FLOW_ACTION_TYPE_PORT_ID:
852 size += SZ_NLATTR_NEST + /* na_act_index. */
853 SZ_NLATTR_STRZ_OF("mirred") +
854 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
855 SZ_NLATTR_TYPE_OF(struct tc_mirred);
856 flags |= MLX5_FLOW_ACTION_PORT_ID;
858 case RTE_FLOW_ACTION_TYPE_DROP:
859 size += SZ_NLATTR_NEST + /* na_act_index. */
860 SZ_NLATTR_STRZ_OF("gact") +
861 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
862 SZ_NLATTR_TYPE_OF(struct tc_gact);
863 flags |= MLX5_FLOW_ACTION_DROP;
865 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
866 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
868 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
869 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
871 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
872 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
874 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
875 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
878 size += SZ_NLATTR_NEST + /* na_act_index. */
879 SZ_NLATTR_STRZ_OF("vlan") +
880 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
881 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
882 SZ_NLATTR_TYPE_OF(uint16_t) +
884 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
885 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
889 "unsupported action %p type %d,"
890 " items must be validated before flow creation",
891 (const void *)actions, actions->type);
895 *action_flags = flags;
900 * Brand rtnetlink buffer with unique handle.
902 * This handle should be unique for a given network interface to avoid
906 * Pointer to Netlink message.
908 * Unique 32-bit handle to use.
911 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
913 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
915 tcm->tcm_handle = handle;
916 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
917 (void *)nlh, handle);
921 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
922 * memory required, allocates the memory, initializes Netlink message headers
923 * and set unique TC message handle.
926 * Pointer to the flow attributes.
928 * Pointer to the list of items.
930 * Pointer to the list of actions.
931 * @param[out] item_flags
932 * Pointer to bit mask of all items detected.
933 * @param[out] action_flags
934 * Pointer to bit mask of all actions detected.
936 * Pointer to the error structure.
939 * Pointer to mlx5_flow object on success,
940 * otherwise NULL and rte_ernno is set.
942 static struct mlx5_flow *
943 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
944 const struct rte_flow_item items[],
945 const struct rte_flow_action actions[],
946 uint64_t *item_flags, uint64_t *action_flags,
947 struct rte_flow_error *error)
949 size_t size = sizeof(struct mlx5_flow) +
950 MNL_ALIGN(sizeof(struct nlmsghdr)) +
951 MNL_ALIGN(sizeof(struct tcmsg));
952 struct mlx5_flow *dev_flow;
953 struct nlmsghdr *nlh;
956 size += flow_tcf_get_items_and_size(items, item_flags);
957 size += flow_tcf_get_actions_and_size(actions, action_flags);
958 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
960 rte_flow_error_set(error, ENOMEM,
961 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
962 "not enough memory to create E-Switch flow");
965 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
966 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
967 *dev_flow = (struct mlx5_flow){
968 .tcf = (struct mlx5_flow_tcf){
974 * Generate a reasonably unique handle based on the address of the
977 * This is straightforward on 32-bit systems where the flow pointer can
978 * be used directly. Otherwise, its least significant part is taken
979 * after shifting it by the previous power of two of the pointed buffer
982 if (sizeof(dev_flow) <= 4)
983 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
985 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
986 rte_log2_u32(rte_align32prevpow2(size)));
991 * Translate flow for Linux TC flower and construct Netlink message.
994 * Pointer to the priv structure.
995 * @param[in, out] flow
996 * Pointer to the sub flow.
998 * Pointer to the flow attributes.
1000 * Pointer to the list of items.
1001 * @param[in] actions
1002 * Pointer to the list of actions.
1004 * Pointer to the error structure.
1007 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1010 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1011 const struct rte_flow_attr *attr,
1012 const struct rte_flow_item items[],
1013 const struct rte_flow_action actions[],
1014 struct rte_flow_error *error)
1017 const struct rte_flow_item_port_id *port_id;
1018 const struct rte_flow_item_eth *eth;
1019 const struct rte_flow_item_vlan *vlan;
1020 const struct rte_flow_item_ipv4 *ipv4;
1021 const struct rte_flow_item_ipv6 *ipv6;
1022 const struct rte_flow_item_tcp *tcp;
1023 const struct rte_flow_item_udp *udp;
1026 const struct rte_flow_action_port_id *port_id;
1027 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1028 const struct rte_flow_action_of_set_vlan_vid *
1030 const struct rte_flow_action_of_set_vlan_pcp *
1033 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1034 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1035 struct tcmsg *tcm = dev_flow->tcf.tcm;
1036 uint32_t na_act_index_cur;
1037 bool eth_type_set = 0;
1038 bool vlan_present = 0;
1039 bool vlan_eth_type_set = 0;
1040 bool ip_proto_set = 0;
1041 struct nlattr *na_flower;
1042 struct nlattr *na_flower_act;
1043 struct nlattr *na_vlan_id = NULL;
1044 struct nlattr *na_vlan_priority = NULL;
1046 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1047 PTOI_TABLE_SZ_MAX(dev)));
1048 nlh = dev_flow->tcf.nlh;
1049 tcm = dev_flow->tcf.tcm;
1050 /* Prepare API must have been called beforehand. */
1051 assert(nlh != NULL && tcm != NULL);
1052 tcm->tcm_family = AF_UNSPEC;
1053 tcm->tcm_ifindex = ptoi[0].ifindex;
1054 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1056 * Priority cannot be zero to prevent the kernel from picking one
1059 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1060 RTE_BE16(ETH_P_ALL));
1061 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1062 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1063 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1064 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1067 switch (items->type) {
1068 case RTE_FLOW_ITEM_TYPE_VOID:
1070 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1071 mask.port_id = flow_tcf_item_mask
1072 (items, &rte_flow_item_port_id_mask,
1073 &flow_tcf_mask_supported.port_id,
1074 &flow_tcf_mask_empty.port_id,
1075 sizeof(flow_tcf_mask_supported.port_id),
1077 assert(mask.port_id);
1078 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1080 spec.port_id = items->spec;
1081 if (!mask.port_id->id)
1084 for (i = 0; ptoi[i].ifindex; ++i)
1085 if (ptoi[i].port_id == spec.port_id->id)
1087 assert(ptoi[i].ifindex);
1088 tcm->tcm_ifindex = ptoi[i].ifindex;
1090 case RTE_FLOW_ITEM_TYPE_ETH:
1091 mask.eth = flow_tcf_item_mask
1092 (items, &rte_flow_item_eth_mask,
1093 &flow_tcf_mask_supported.eth,
1094 &flow_tcf_mask_empty.eth,
1095 sizeof(flow_tcf_mask_supported.eth),
1098 if (mask.eth == &flow_tcf_mask_empty.eth)
1100 spec.eth = items->spec;
1101 if (mask.eth->type) {
1102 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1106 if (!is_zero_ether_addr(&mask.eth->dst)) {
1107 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1109 spec.eth->dst.addr_bytes);
1110 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1112 mask.eth->dst.addr_bytes);
1114 if (!is_zero_ether_addr(&mask.eth->src)) {
1115 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1117 spec.eth->src.addr_bytes);
1118 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1120 mask.eth->src.addr_bytes);
1123 case RTE_FLOW_ITEM_TYPE_VLAN:
1124 mask.vlan = flow_tcf_item_mask
1125 (items, &rte_flow_item_vlan_mask,
1126 &flow_tcf_mask_supported.vlan,
1127 &flow_tcf_mask_empty.vlan,
1128 sizeof(flow_tcf_mask_supported.vlan),
1132 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1133 RTE_BE16(ETH_P_8021Q));
1136 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1138 spec.vlan = items->spec;
1139 if (mask.vlan->inner_type) {
1140 mnl_attr_put_u16(nlh,
1141 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1142 spec.vlan->inner_type);
1143 vlan_eth_type_set = 1;
1145 if (mask.vlan->tci & RTE_BE16(0xe000))
1146 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1148 (spec.vlan->tci) >> 13) & 0x7);
1149 if (mask.vlan->tci & RTE_BE16(0x0fff))
1150 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1155 case RTE_FLOW_ITEM_TYPE_IPV4:
1156 mask.ipv4 = flow_tcf_item_mask
1157 (items, &rte_flow_item_ipv4_mask,
1158 &flow_tcf_mask_supported.ipv4,
1159 &flow_tcf_mask_empty.ipv4,
1160 sizeof(flow_tcf_mask_supported.ipv4),
1163 if (!eth_type_set || !vlan_eth_type_set)
1164 mnl_attr_put_u16(nlh,
1166 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1167 TCA_FLOWER_KEY_ETH_TYPE,
1168 RTE_BE16(ETH_P_IP));
1170 vlan_eth_type_set = 1;
1171 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1173 spec.ipv4 = items->spec;
1174 if (mask.ipv4->hdr.next_proto_id) {
1175 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1176 spec.ipv4->hdr.next_proto_id);
1179 if (mask.ipv4->hdr.src_addr) {
1180 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1181 spec.ipv4->hdr.src_addr);
1182 mnl_attr_put_u32(nlh,
1183 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1184 mask.ipv4->hdr.src_addr);
1186 if (mask.ipv4->hdr.dst_addr) {
1187 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1188 spec.ipv4->hdr.dst_addr);
1189 mnl_attr_put_u32(nlh,
1190 TCA_FLOWER_KEY_IPV4_DST_MASK,
1191 mask.ipv4->hdr.dst_addr);
1194 case RTE_FLOW_ITEM_TYPE_IPV6:
1195 mask.ipv6 = flow_tcf_item_mask
1196 (items, &rte_flow_item_ipv6_mask,
1197 &flow_tcf_mask_supported.ipv6,
1198 &flow_tcf_mask_empty.ipv6,
1199 sizeof(flow_tcf_mask_supported.ipv6),
1202 if (!eth_type_set || !vlan_eth_type_set)
1203 mnl_attr_put_u16(nlh,
1205 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1206 TCA_FLOWER_KEY_ETH_TYPE,
1207 RTE_BE16(ETH_P_IPV6));
1209 vlan_eth_type_set = 1;
1210 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1212 spec.ipv6 = items->spec;
1213 if (mask.ipv6->hdr.proto) {
1214 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1215 spec.ipv6->hdr.proto);
1218 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1219 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1220 sizeof(spec.ipv6->hdr.src_addr),
1221 spec.ipv6->hdr.src_addr);
1222 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1223 sizeof(mask.ipv6->hdr.src_addr),
1224 mask.ipv6->hdr.src_addr);
1226 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1227 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1228 sizeof(spec.ipv6->hdr.dst_addr),
1229 spec.ipv6->hdr.dst_addr);
1230 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1231 sizeof(mask.ipv6->hdr.dst_addr),
1232 mask.ipv6->hdr.dst_addr);
1235 case RTE_FLOW_ITEM_TYPE_UDP:
1236 mask.udp = flow_tcf_item_mask
1237 (items, &rte_flow_item_udp_mask,
1238 &flow_tcf_mask_supported.udp,
1239 &flow_tcf_mask_empty.udp,
1240 sizeof(flow_tcf_mask_supported.udp),
1244 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1246 if (mask.udp == &flow_tcf_mask_empty.udp)
1248 spec.udp = items->spec;
1249 if (mask.udp->hdr.src_port) {
1250 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1251 spec.udp->hdr.src_port);
1252 mnl_attr_put_u16(nlh,
1253 TCA_FLOWER_KEY_UDP_SRC_MASK,
1254 mask.udp->hdr.src_port);
1256 if (mask.udp->hdr.dst_port) {
1257 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1258 spec.udp->hdr.dst_port);
1259 mnl_attr_put_u16(nlh,
1260 TCA_FLOWER_KEY_UDP_DST_MASK,
1261 mask.udp->hdr.dst_port);
1264 case RTE_FLOW_ITEM_TYPE_TCP:
1265 mask.tcp = flow_tcf_item_mask
1266 (items, &rte_flow_item_tcp_mask,
1267 &flow_tcf_mask_supported.tcp,
1268 &flow_tcf_mask_empty.tcp,
1269 sizeof(flow_tcf_mask_supported.tcp),
1273 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1275 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1277 spec.tcp = items->spec;
1278 if (mask.tcp->hdr.src_port) {
1279 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1280 spec.tcp->hdr.src_port);
1281 mnl_attr_put_u16(nlh,
1282 TCA_FLOWER_KEY_TCP_SRC_MASK,
1283 mask.tcp->hdr.src_port);
1285 if (mask.tcp->hdr.dst_port) {
1286 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1287 spec.tcp->hdr.dst_port);
1288 mnl_attr_put_u16(nlh,
1289 TCA_FLOWER_KEY_TCP_DST_MASK,
1290 mask.tcp->hdr.dst_port);
1294 return rte_flow_error_set(error, ENOTSUP,
1295 RTE_FLOW_ERROR_TYPE_ITEM,
1296 NULL, "item not supported");
1299 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1300 na_act_index_cur = 1;
1301 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1302 struct nlattr *na_act_index;
1303 struct nlattr *na_act;
1304 unsigned int vlan_act;
1307 switch (actions->type) {
1308 case RTE_FLOW_ACTION_TYPE_VOID:
1310 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1311 conf.port_id = actions->conf;
1312 if (conf.port_id->original)
1315 for (i = 0; ptoi[i].ifindex; ++i)
1316 if (ptoi[i].port_id == conf.port_id->id)
1318 assert(ptoi[i].ifindex);
1320 mnl_attr_nest_start(nlh, na_act_index_cur++);
1321 assert(na_act_index);
1322 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1323 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1325 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1326 sizeof(struct tc_mirred),
1327 &(struct tc_mirred){
1328 .action = TC_ACT_STOLEN,
1329 .eaction = TCA_EGRESS_REDIR,
1330 .ifindex = ptoi[i].ifindex,
1332 mnl_attr_nest_end(nlh, na_act);
1333 mnl_attr_nest_end(nlh, na_act_index);
1335 case RTE_FLOW_ACTION_TYPE_DROP:
1337 mnl_attr_nest_start(nlh, na_act_index_cur++);
1338 assert(na_act_index);
1339 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1340 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1342 mnl_attr_put(nlh, TCA_GACT_PARMS,
1343 sizeof(struct tc_gact),
1345 .action = TC_ACT_SHOT,
1347 mnl_attr_nest_end(nlh, na_act);
1348 mnl_attr_nest_end(nlh, na_act_index);
1350 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1351 conf.of_push_vlan = NULL;
1352 vlan_act = TCA_VLAN_ACT_POP;
1353 goto action_of_vlan;
1354 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1355 conf.of_push_vlan = actions->conf;
1356 vlan_act = TCA_VLAN_ACT_PUSH;
1357 goto action_of_vlan;
1358 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1359 conf.of_set_vlan_vid = actions->conf;
1361 goto override_na_vlan_id;
1362 vlan_act = TCA_VLAN_ACT_MODIFY;
1363 goto action_of_vlan;
1364 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1365 conf.of_set_vlan_pcp = actions->conf;
1366 if (na_vlan_priority)
1367 goto override_na_vlan_priority;
1368 vlan_act = TCA_VLAN_ACT_MODIFY;
1369 goto action_of_vlan;
1372 mnl_attr_nest_start(nlh, na_act_index_cur++);
1373 assert(na_act_index);
1374 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1375 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1377 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1378 sizeof(struct tc_vlan),
1380 .action = TC_ACT_PIPE,
1381 .v_action = vlan_act,
1383 if (vlan_act == TCA_VLAN_ACT_POP) {
1384 mnl_attr_nest_end(nlh, na_act);
1385 mnl_attr_nest_end(nlh, na_act_index);
1388 if (vlan_act == TCA_VLAN_ACT_PUSH)
1389 mnl_attr_put_u16(nlh,
1390 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1391 conf.of_push_vlan->ethertype);
1392 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1393 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1394 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1395 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1396 mnl_attr_nest_end(nlh, na_act);
1397 mnl_attr_nest_end(nlh, na_act_index);
1398 if (actions->type ==
1399 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1400 override_na_vlan_id:
1401 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1402 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1404 (conf.of_set_vlan_vid->vlan_vid);
1405 } else if (actions->type ==
1406 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1407 override_na_vlan_priority:
1408 na_vlan_priority->nla_type =
1409 TCA_VLAN_PUSH_VLAN_PRIORITY;
1410 *(uint8_t *)mnl_attr_get_payload
1411 (na_vlan_priority) =
1412 conf.of_set_vlan_pcp->vlan_pcp;
1416 return rte_flow_error_set(error, ENOTSUP,
1417 RTE_FLOW_ERROR_TYPE_ACTION,
1419 "action not supported");
1423 assert(na_flower_act);
1424 mnl_attr_nest_end(nlh, na_flower_act);
1425 mnl_attr_nest_end(nlh, na_flower);
1430 * Send Netlink message with acknowledgment.
1433 * Libmnl socket to use.
1435 * Message to send. This function always raises the NLM_F_ACK flag before
1439 * 0 on success, a negative errno value otherwise and rte_errno is set.
1442 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1444 alignas(struct nlmsghdr)
1445 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1446 nlh->nlmsg_len - sizeof(*nlh)];
1447 uint32_t seq = random();
1450 nlh->nlmsg_flags |= NLM_F_ACK;
1451 nlh->nlmsg_seq = seq;
1452 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1454 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1457 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1465 * Apply flow to E-Switch by sending Netlink message.
1468 * Pointer to Ethernet device.
1469 * @param[in, out] flow
1470 * Pointer to the sub flow.
1472 * Pointer to the error structure.
1475 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1478 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1479 struct rte_flow_error *error)
1481 struct priv *priv = dev->data->dev_private;
1482 struct mnl_socket *nl = priv->mnl_socket;
1483 struct mlx5_flow *dev_flow;
1484 struct nlmsghdr *nlh;
1486 dev_flow = LIST_FIRST(&flow->dev_flows);
1487 /* E-Switch flow can't be expanded. */
1488 assert(!LIST_NEXT(dev_flow, next));
1489 nlh = dev_flow->tcf.nlh;
1490 nlh->nlmsg_type = RTM_NEWTFILTER;
1491 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1492 if (!flow_tcf_nl_ack(nl, nlh))
1494 return rte_flow_error_set(error, rte_errno,
1495 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1496 "netlink: failed to create TC flow rule");
1500 * Remove flow from E-Switch by sending Netlink message.
1503 * Pointer to Ethernet device.
1504 * @param[in, out] flow
1505 * Pointer to the sub flow.
1508 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1510 struct priv *priv = dev->data->dev_private;
1511 struct mnl_socket *nl = priv->mnl_socket;
1512 struct mlx5_flow *dev_flow;
1513 struct nlmsghdr *nlh;
1517 dev_flow = LIST_FIRST(&flow->dev_flows);
1520 /* E-Switch flow can't be expanded. */
1521 assert(!LIST_NEXT(dev_flow, next));
1522 nlh = dev_flow->tcf.nlh;
1523 nlh->nlmsg_type = RTM_DELTFILTER;
1524 nlh->nlmsg_flags = NLM_F_REQUEST;
1525 flow_tcf_nl_ack(nl, nlh);
1529 * Remove flow from E-Switch and release resources of the device flow.
1532 * Pointer to Ethernet device.
1533 * @param[in, out] flow
1534 * Pointer to the sub flow.
1537 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1539 struct mlx5_flow *dev_flow;
1543 flow_tcf_remove(dev, flow);
1544 dev_flow = LIST_FIRST(&flow->dev_flows);
1547 /* E-Switch flow can't be expanded. */
1548 assert(!LIST_NEXT(dev_flow, next));
1549 LIST_REMOVE(dev_flow, next);
1553 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
1554 .validate = flow_tcf_validate,
1555 .prepare = flow_tcf_prepare,
1556 .translate = flow_tcf_translate,
1557 .apply = flow_tcf_apply,
1558 .remove = flow_tcf_remove,
1559 .destroy = flow_tcf_destroy,
1563 * Initialize ingress qdisc of a given network interface.
1566 * Libmnl socket of the @p NETLINK_ROUTE kind.
1568 * Index of network interface to initialize.
1570 * Perform verbose error reporting if not NULL.
1573 * 0 on success, a negative errno value otherwise and rte_errno is set.
1576 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
1577 struct rte_flow_error *error)
1579 struct nlmsghdr *nlh;
1581 alignas(struct nlmsghdr)
1582 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1584 /* Destroy existing ingress qdisc and everything attached to it. */
1585 nlh = mnl_nlmsg_put_header(buf);
1586 nlh->nlmsg_type = RTM_DELQDISC;
1587 nlh->nlmsg_flags = NLM_F_REQUEST;
1588 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1589 tcm->tcm_family = AF_UNSPEC;
1590 tcm->tcm_ifindex = ifindex;
1591 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1592 tcm->tcm_parent = TC_H_INGRESS;
1593 /* Ignore errors when qdisc is already absent. */
1594 if (flow_tcf_nl_ack(nl, nlh) &&
1595 rte_errno != EINVAL && rte_errno != ENOENT)
1596 return rte_flow_error_set(error, rte_errno,
1597 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1598 "netlink: failed to remove ingress"
1600 /* Create fresh ingress qdisc. */
1601 nlh = mnl_nlmsg_put_header(buf);
1602 nlh->nlmsg_type = RTM_NEWQDISC;
1603 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1604 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1605 tcm->tcm_family = AF_UNSPEC;
1606 tcm->tcm_ifindex = ifindex;
1607 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1608 tcm->tcm_parent = TC_H_INGRESS;
1609 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1610 if (flow_tcf_nl_ack(nl, nlh))
1611 return rte_flow_error_set(error, rte_errno,
1612 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1613 "netlink: failed to create ingress"
1619 * Create and configure a libmnl socket for Netlink flow rules.
1622 * A valid libmnl socket object pointer on success, NULL otherwise and
1626 mlx5_flow_tcf_socket_create(void)
1628 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1631 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1633 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1638 mnl_socket_close(nl);
1643 * Destroy a libmnl socket.
1646 * Libmnl socket of the @p NETLINK_ROUTE kind.
1649 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
1651 mnl_socket_close(nl);