1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 /* Normally found in linux/netlink.h. */
57 #ifndef NETLINK_CAP_ACK
58 #define NETLINK_CAP_ACK 10
61 /* Normally found in linux/pkt_sched.h. */
62 #ifndef TC_H_MIN_INGRESS
63 #define TC_H_MIN_INGRESS 0xfff2u
66 /* Normally found in linux/pkt_cls.h. */
67 #ifndef TCA_CLS_FLAGS_SKIP_SW
68 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
70 #ifndef HAVE_TCA_FLOWER_ACT
71 #define TCA_FLOWER_ACT 3
73 #ifndef HAVE_TCA_FLOWER_FLAGS
74 #define TCA_FLOWER_FLAGS 22
76 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
77 #define TCA_FLOWER_KEY_ETH_TYPE 8
79 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
80 #define TCA_FLOWER_KEY_ETH_DST 4
82 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
83 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
85 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
86 #define TCA_FLOWER_KEY_ETH_SRC 6
88 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
89 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
91 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
92 #define TCA_FLOWER_KEY_IP_PROTO 9
94 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
95 #define TCA_FLOWER_KEY_IPV4_SRC 10
97 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
98 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
100 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
101 #define TCA_FLOWER_KEY_IPV4_DST 12
103 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
104 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
106 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
107 #define TCA_FLOWER_KEY_IPV6_SRC 14
109 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
110 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
112 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
113 #define TCA_FLOWER_KEY_IPV6_DST 16
115 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
116 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
118 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
119 #define TCA_FLOWER_KEY_TCP_SRC 18
121 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
122 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
124 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
125 #define TCA_FLOWER_KEY_TCP_DST 19
127 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
128 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
130 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
131 #define TCA_FLOWER_KEY_UDP_SRC 20
133 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
134 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
136 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
137 #define TCA_FLOWER_KEY_UDP_DST 21
139 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
140 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
142 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
143 #define TCA_FLOWER_KEY_VLAN_ID 23
145 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
146 #define TCA_FLOWER_KEY_VLAN_PRIO 24
148 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
149 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
152 #ifndef IPV6_ADDR_LEN
153 #define IPV6_ADDR_LEN 16
156 /** Empty masks for known item types. */
158 struct rte_flow_item_port_id port_id;
159 struct rte_flow_item_eth eth;
160 struct rte_flow_item_vlan vlan;
161 struct rte_flow_item_ipv4 ipv4;
162 struct rte_flow_item_ipv6 ipv6;
163 struct rte_flow_item_tcp tcp;
164 struct rte_flow_item_udp udp;
165 } flow_tcf_mask_empty;
167 /** Supported masks for known item types. */
168 static const struct {
169 struct rte_flow_item_port_id port_id;
170 struct rte_flow_item_eth eth;
171 struct rte_flow_item_vlan vlan;
172 struct rte_flow_item_ipv4 ipv4;
173 struct rte_flow_item_ipv6 ipv6;
174 struct rte_flow_item_tcp tcp;
175 struct rte_flow_item_udp udp;
176 } flow_tcf_mask_supported = {
181 .type = RTE_BE16(0xffff),
182 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
183 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
186 /* PCP and VID only, no DEI. */
187 .tci = RTE_BE16(0xefff),
188 .inner_type = RTE_BE16(0xffff),
191 .next_proto_id = 0xff,
192 .src_addr = RTE_BE32(0xffffffff),
193 .dst_addr = RTE_BE32(0xffffffff),
198 "\xff\xff\xff\xff\xff\xff\xff\xff"
199 "\xff\xff\xff\xff\xff\xff\xff\xff",
201 "\xff\xff\xff\xff\xff\xff\xff\xff"
202 "\xff\xff\xff\xff\xff\xff\xff\xff",
205 .src_port = RTE_BE16(0xffff),
206 .dst_port = RTE_BE16(0xffff),
209 .src_port = RTE_BE16(0xffff),
210 .dst_port = RTE_BE16(0xffff),
214 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
215 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
216 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
217 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
218 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
220 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
222 /** DPDK port to network interface index (ifindex) conversion. */
223 struct flow_tcf_ptoi {
224 uint16_t port_id; /**< DPDK port ID. */
225 unsigned int ifindex; /**< Network interface index. */
228 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
231 * Retrieve mask for pattern item.
233 * This function does basic sanity checks on a pattern item in order to
234 * return the most appropriate mask for it.
237 * Item specification.
238 * @param[in] mask_default
239 * Default mask for pattern item as specified by the flow API.
240 * @param[in] mask_supported
241 * Mask fields supported by the implementation.
242 * @param[in] mask_empty
243 * Empty mask to return when there is no specification.
245 * Perform verbose error reporting if not NULL.
248 * Either @p item->mask or one of the mask parameters on success, NULL
249 * otherwise and rte_errno is set.
252 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
253 const void *mask_supported, const void *mask_empty,
254 size_t mask_size, struct rte_flow_error *error)
259 /* item->last and item->mask cannot exist without item->spec. */
260 if (!item->spec && (item->mask || item->last)) {
261 rte_flow_error_set(error, EINVAL,
262 RTE_FLOW_ERROR_TYPE_ITEM, item,
263 "\"mask\" or \"last\" field provided without"
264 " a corresponding \"spec\"");
267 /* No spec, no mask, no problem. */
270 mask = item->mask ? item->mask : mask_default;
273 * Single-pass check to make sure that:
274 * - Mask is supported, no bits are set outside mask_supported.
275 * - Both item->spec and item->last are included in mask.
277 for (i = 0; i != mask_size; ++i) {
280 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
281 ((const uint8_t *)mask_supported)[i]) {
282 rte_flow_error_set(error, ENOTSUP,
283 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
284 "unsupported field found"
289 (((const uint8_t *)item->spec)[i] & mask[i]) !=
290 (((const uint8_t *)item->last)[i] & mask[i])) {
291 rte_flow_error_set(error, EINVAL,
292 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
294 "range between \"spec\" and \"last\""
295 " not comprised in \"mask\"");
303 * Build a conversion table between port ID and ifindex.
306 * Pointer to Ethernet device.
308 * Pointer to ptoi table.
310 * Size of ptoi table provided.
313 * Size of ptoi table filled.
316 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
319 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
320 uint16_t port_id[n + 1];
322 unsigned int own = 0;
324 /* At least one port is needed when no switch domain is present. */
327 port_id[0] = dev->data->port_id;
329 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
333 for (i = 0; i != n; ++i) {
334 struct rte_eth_dev_info dev_info;
336 rte_eth_dev_info_get(port_id[i], &dev_info);
337 if (port_id[i] == dev->data->port_id)
339 ptoi[i].port_id = port_id[i];
340 ptoi[i].ifindex = dev_info.if_index;
342 /* Ensure first entry of ptoi[] is the current device. */
348 /* An entry with zero ifindex terminates ptoi[]. */
355 * Verify the @p attr will be correctly understood by the E-switch.
358 * Pointer to flow attributes
360 * Pointer to error structure.
363 * 0 on success, a negative errno value otherwise and rte_errno is set.
366 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
367 struct rte_flow_error *error)
370 * Supported attributes: no groups, some priorities and ingress only.
371 * Don't care about transfer as it is the caller's problem.
374 return rte_flow_error_set(error, ENOTSUP,
375 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
376 "groups are not supported");
377 if (attr->priority > 0xfffe)
378 return rte_flow_error_set(error, ENOTSUP,
379 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
381 "lowest priority level is 0xfffe");
383 return rte_flow_error_set(error, EINVAL,
384 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
385 attr, "only ingress is supported");
387 return rte_flow_error_set(error, ENOTSUP,
388 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
389 attr, "egress is not supported");
394 * Validate flow for E-Switch.
397 * Pointer to the priv structure.
399 * Pointer to the flow attributes.
401 * Pointer to the list of items.
403 * Pointer to the list of actions.
405 * Pointer to the error structure.
408 * 0 on success, a negative errno value otherwise and rte_ernno is set.
411 flow_tcf_validate(struct rte_eth_dev *dev,
412 const struct rte_flow_attr *attr,
413 const struct rte_flow_item items[],
414 const struct rte_flow_action actions[],
415 struct rte_flow_error *error)
418 const struct rte_flow_item_port_id *port_id;
419 const struct rte_flow_item_eth *eth;
420 const struct rte_flow_item_vlan *vlan;
421 const struct rte_flow_item_ipv4 *ipv4;
422 const struct rte_flow_item_ipv6 *ipv6;
423 const struct rte_flow_item_tcp *tcp;
424 const struct rte_flow_item_udp *udp;
427 const struct rte_flow_action_port_id *port_id;
428 const struct rte_flow_action_of_push_vlan *of_push_vlan;
429 const struct rte_flow_action_of_set_vlan_vid *
431 const struct rte_flow_action_of_set_vlan_pcp *
434 uint32_t item_flags = 0;
435 uint32_t action_flags = 0;
436 uint8_t next_protocol = -1;
437 unsigned int tcm_ifindex = 0;
438 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
442 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
443 PTOI_TABLE_SZ_MAX(dev)));
444 ret = flow_tcf_validate_attributes(attr, error);
447 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
450 switch (items->type) {
451 case RTE_FLOW_ITEM_TYPE_VOID:
453 case RTE_FLOW_ITEM_TYPE_PORT_ID:
454 mask.port_id = flow_tcf_item_mask
455 (items, &rte_flow_item_port_id_mask,
456 &flow_tcf_mask_supported.port_id,
457 &flow_tcf_mask_empty.port_id,
458 sizeof(flow_tcf_mask_supported.port_id),
462 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
466 spec.port_id = items->spec;
467 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
468 return rte_flow_error_set
470 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
472 "no support for partial mask on"
474 if (!mask.port_id->id)
477 for (i = 0; ptoi[i].ifindex; ++i)
478 if (ptoi[i].port_id == spec.port_id->id)
480 if (!ptoi[i].ifindex)
481 return rte_flow_error_set
483 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
485 "missing data to convert port ID to"
487 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
488 return rte_flow_error_set
490 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
492 "cannot match traffic for"
493 " several port IDs through"
494 " a single flow rule");
495 tcm_ifindex = ptoi[i].ifindex;
498 case RTE_FLOW_ITEM_TYPE_ETH:
499 ret = mlx5_flow_validate_item_eth(items, item_flags,
503 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
505 * Redundant check due to different supported mask.
506 * Same for the rest of items.
508 mask.eth = flow_tcf_item_mask
509 (items, &rte_flow_item_eth_mask,
510 &flow_tcf_mask_supported.eth,
511 &flow_tcf_mask_empty.eth,
512 sizeof(flow_tcf_mask_supported.eth),
516 if (mask.eth->type && mask.eth->type !=
518 return rte_flow_error_set
520 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
522 "no support for partial mask on"
525 case RTE_FLOW_ITEM_TYPE_VLAN:
526 ret = mlx5_flow_validate_item_vlan(items, item_flags,
530 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
531 mask.vlan = flow_tcf_item_mask
532 (items, &rte_flow_item_vlan_mask,
533 &flow_tcf_mask_supported.vlan,
534 &flow_tcf_mask_empty.vlan,
535 sizeof(flow_tcf_mask_supported.vlan),
539 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
540 (mask.vlan->tci & RTE_BE16(0xe000)) !=
542 (mask.vlan->tci & RTE_BE16(0x0fff) &&
543 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
545 (mask.vlan->inner_type &&
546 mask.vlan->inner_type != RTE_BE16(0xffff)))
547 return rte_flow_error_set
549 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
551 "no support for partial masks on"
552 " \"tci\" (PCP and VID parts) and"
553 " \"inner_type\" fields");
555 case RTE_FLOW_ITEM_TYPE_IPV4:
556 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
560 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
561 mask.ipv4 = flow_tcf_item_mask
562 (items, &rte_flow_item_ipv4_mask,
563 &flow_tcf_mask_supported.ipv4,
564 &flow_tcf_mask_empty.ipv4,
565 sizeof(flow_tcf_mask_supported.ipv4),
569 if (mask.ipv4->hdr.next_proto_id &&
570 mask.ipv4->hdr.next_proto_id != 0xff)
571 return rte_flow_error_set
573 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
575 "no support for partial mask on"
576 " \"hdr.next_proto_id\" field");
577 else if (mask.ipv4->hdr.next_proto_id)
579 ((const struct rte_flow_item_ipv4 *)
580 (items->spec))->hdr.next_proto_id;
582 case RTE_FLOW_ITEM_TYPE_IPV6:
583 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
587 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
588 mask.ipv6 = flow_tcf_item_mask
589 (items, &rte_flow_item_ipv6_mask,
590 &flow_tcf_mask_supported.ipv6,
591 &flow_tcf_mask_empty.ipv6,
592 sizeof(flow_tcf_mask_supported.ipv6),
596 if (mask.ipv6->hdr.proto &&
597 mask.ipv6->hdr.proto != 0xff)
598 return rte_flow_error_set
600 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
602 "no support for partial mask on"
603 " \"hdr.proto\" field");
604 else if (mask.ipv6->hdr.proto)
606 ((const struct rte_flow_item_ipv6 *)
607 (items->spec))->hdr.proto;
609 case RTE_FLOW_ITEM_TYPE_UDP:
610 ret = mlx5_flow_validate_item_udp(items, item_flags,
611 next_protocol, error);
614 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
615 mask.udp = flow_tcf_item_mask
616 (items, &rte_flow_item_udp_mask,
617 &flow_tcf_mask_supported.udp,
618 &flow_tcf_mask_empty.udp,
619 sizeof(flow_tcf_mask_supported.udp),
624 case RTE_FLOW_ITEM_TYPE_TCP:
625 ret = mlx5_flow_validate_item_tcp(items, item_flags,
626 next_protocol, error);
629 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
630 mask.tcp = flow_tcf_item_mask
631 (items, &rte_flow_item_tcp_mask,
632 &flow_tcf_mask_supported.tcp,
633 &flow_tcf_mask_empty.tcp,
634 sizeof(flow_tcf_mask_supported.tcp),
640 return rte_flow_error_set(error, ENOTSUP,
641 RTE_FLOW_ERROR_TYPE_ITEM,
642 NULL, "item not supported");
645 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
648 switch (actions->type) {
649 case RTE_FLOW_ACTION_TYPE_VOID:
651 case RTE_FLOW_ACTION_TYPE_PORT_ID:
652 if (action_flags & MLX5_TCF_FATE_ACTIONS)
653 return rte_flow_error_set
655 RTE_FLOW_ERROR_TYPE_ACTION, actions,
656 "can't have multiple fate actions");
657 conf.port_id = actions->conf;
658 if (conf.port_id->original)
661 for (i = 0; ptoi[i].ifindex; ++i)
662 if (ptoi[i].port_id == conf.port_id->id)
664 if (!ptoi[i].ifindex)
665 return rte_flow_error_set
667 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
669 "missing data to convert port ID to"
671 action_flags |= MLX5_FLOW_ACTION_PORT_ID;
673 case RTE_FLOW_ACTION_TYPE_DROP:
674 if (action_flags & MLX5_TCF_FATE_ACTIONS)
675 return rte_flow_error_set
677 RTE_FLOW_ERROR_TYPE_ACTION, actions,
678 "can't have multiple fate actions");
679 action_flags |= MLX5_FLOW_ACTION_DROP;
681 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
682 action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
684 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
685 action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
687 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
688 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
690 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
691 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
694 return rte_flow_error_set(error, ENOTSUP,
695 RTE_FLOW_ERROR_TYPE_ACTION,
697 "action not supported");
700 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
701 return rte_flow_error_set(error, EINVAL,
702 RTE_FLOW_ERROR_TYPE_ACTION, actions,
703 "no fate action is found");
708 * Calculate maximum size of memory for flow items of Linux TC flower and
709 * extract specified items.
712 * Pointer to the list of items.
713 * @param[out] item_flags
714 * Pointer to the detected items.
717 * Maximum size of memory for items.
720 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
721 uint64_t *item_flags)
726 size += SZ_NLATTR_STRZ_OF("flower") +
727 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
728 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
729 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
730 switch (items->type) {
731 case RTE_FLOW_ITEM_TYPE_VOID:
733 case RTE_FLOW_ITEM_TYPE_PORT_ID:
735 case RTE_FLOW_ITEM_TYPE_ETH:
736 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
737 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
738 /* dst/src MAC addr and mask. */
739 flags |= MLX5_FLOW_LAYER_OUTER_L2;
741 case RTE_FLOW_ITEM_TYPE_VLAN:
742 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
743 SZ_NLATTR_TYPE_OF(uint16_t) +
744 /* VLAN Ether type. */
745 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
746 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
747 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
749 case RTE_FLOW_ITEM_TYPE_IPV4:
750 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
751 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
752 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
753 /* dst/src IP addr and mask. */
754 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
756 case RTE_FLOW_ITEM_TYPE_IPV6:
757 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
758 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
759 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
760 /* dst/src IP addr and mask. */
761 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
763 case RTE_FLOW_ITEM_TYPE_UDP:
764 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
765 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
766 /* dst/src port and mask. */
767 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
769 case RTE_FLOW_ITEM_TYPE_TCP:
770 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
771 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
772 /* dst/src port and mask. */
773 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
777 "unsupported item %p type %d,"
778 " items must be validated before flow creation",
779 (const void *)items, items->type);
788 * Calculate maximum size of memory for flow actions of Linux TC flower and
789 * extract specified actions.
792 * Pointer to the list of actions.
793 * @param[out] action_flags
794 * Pointer to the detected actions.
797 * Maximum size of memory for actions.
800 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
801 uint64_t *action_flags)
806 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
807 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
808 switch (actions->type) {
809 case RTE_FLOW_ACTION_TYPE_VOID:
811 case RTE_FLOW_ACTION_TYPE_PORT_ID:
812 size += SZ_NLATTR_NEST + /* na_act_index. */
813 SZ_NLATTR_STRZ_OF("mirred") +
814 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
815 SZ_NLATTR_TYPE_OF(struct tc_mirred);
816 flags |= MLX5_FLOW_ACTION_PORT_ID;
818 case RTE_FLOW_ACTION_TYPE_DROP:
819 size += SZ_NLATTR_NEST + /* na_act_index. */
820 SZ_NLATTR_STRZ_OF("gact") +
821 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
822 SZ_NLATTR_TYPE_OF(struct tc_gact);
823 flags |= MLX5_FLOW_ACTION_DROP;
825 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
826 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
828 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
829 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
831 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
832 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
834 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
835 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
838 size += SZ_NLATTR_NEST + /* na_act_index. */
839 SZ_NLATTR_STRZ_OF("vlan") +
840 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
841 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
842 SZ_NLATTR_TYPE_OF(uint16_t) +
844 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
845 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
849 "unsupported action %p type %d,"
850 " items must be validated before flow creation",
851 (const void *)actions, actions->type);
855 *action_flags = flags;
860 * Brand rtnetlink buffer with unique handle.
862 * This handle should be unique for a given network interface to avoid
866 * Pointer to Netlink message.
868 * Unique 32-bit handle to use.
871 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
873 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
875 tcm->tcm_handle = handle;
876 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
877 (void *)nlh, handle);
881 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
882 * memory required, allocates the memory, initializes Netlink message headers
883 * and set unique TC message handle.
886 * Pointer to the flow attributes.
888 * Pointer to the list of items.
890 * Pointer to the list of actions.
891 * @param[out] item_flags
892 * Pointer to bit mask of all items detected.
893 * @param[out] action_flags
894 * Pointer to bit mask of all actions detected.
896 * Pointer to the error structure.
899 * Pointer to mlx5_flow object on success,
900 * otherwise NULL and rte_ernno is set.
902 static struct mlx5_flow *
903 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
904 const struct rte_flow_item items[],
905 const struct rte_flow_action actions[],
906 uint64_t *item_flags, uint64_t *action_flags,
907 struct rte_flow_error *error)
909 size_t size = sizeof(struct mlx5_flow) +
910 MNL_ALIGN(sizeof(struct nlmsghdr)) +
911 MNL_ALIGN(sizeof(struct tcmsg));
912 struct mlx5_flow *dev_flow;
913 struct nlmsghdr *nlh;
916 size += flow_tcf_get_items_and_size(items, item_flags);
917 size += flow_tcf_get_actions_and_size(actions, action_flags);
918 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
920 rte_flow_error_set(error, ENOMEM,
921 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
922 "not enough memory to create E-Switch flow");
925 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
926 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
927 *dev_flow = (struct mlx5_flow){
928 .tcf = (struct mlx5_flow_tcf){
934 * Generate a reasonably unique handle based on the address of the
937 * This is straightforward on 32-bit systems where the flow pointer can
938 * be used directly. Otherwise, its least significant part is taken
939 * after shifting it by the previous power of two of the pointed buffer
942 if (sizeof(dev_flow) <= 4)
943 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
945 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
946 rte_log2_u32(rte_align32prevpow2(size)));
951 * Translate flow for Linux TC flower and construct Netlink message.
954 * Pointer to the priv structure.
955 * @param[in, out] flow
956 * Pointer to the sub flow.
958 * Pointer to the flow attributes.
960 * Pointer to the list of items.
962 * Pointer to the list of actions.
964 * Pointer to the error structure.
967 * 0 on success, a negative errno value otherwise and rte_ernno is set.
970 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
971 const struct rte_flow_attr *attr,
972 const struct rte_flow_item items[],
973 const struct rte_flow_action actions[],
974 struct rte_flow_error *error)
977 const struct rte_flow_item_port_id *port_id;
978 const struct rte_flow_item_eth *eth;
979 const struct rte_flow_item_vlan *vlan;
980 const struct rte_flow_item_ipv4 *ipv4;
981 const struct rte_flow_item_ipv6 *ipv6;
982 const struct rte_flow_item_tcp *tcp;
983 const struct rte_flow_item_udp *udp;
986 const struct rte_flow_action_port_id *port_id;
987 const struct rte_flow_action_of_push_vlan *of_push_vlan;
988 const struct rte_flow_action_of_set_vlan_vid *
990 const struct rte_flow_action_of_set_vlan_pcp *
993 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
994 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
995 struct tcmsg *tcm = dev_flow->tcf.tcm;
996 uint32_t na_act_index_cur;
997 bool eth_type_set = 0;
998 bool vlan_present = 0;
999 bool vlan_eth_type_set = 0;
1000 bool ip_proto_set = 0;
1001 struct nlattr *na_flower;
1002 struct nlattr *na_flower_act;
1003 struct nlattr *na_vlan_id = NULL;
1004 struct nlattr *na_vlan_priority = NULL;
1006 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1007 PTOI_TABLE_SZ_MAX(dev)));
1008 nlh = dev_flow->tcf.nlh;
1009 tcm = dev_flow->tcf.tcm;
1010 /* Prepare API must have been called beforehand. */
1011 assert(nlh != NULL && tcm != NULL);
1012 tcm->tcm_family = AF_UNSPEC;
1013 tcm->tcm_ifindex = ptoi[0].ifindex;
1014 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1016 * Priority cannot be zero to prevent the kernel from picking one
1019 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1020 RTE_BE16(ETH_P_ALL));
1021 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1022 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1023 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1024 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1027 switch (items->type) {
1028 case RTE_FLOW_ITEM_TYPE_VOID:
1030 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1031 mask.port_id = flow_tcf_item_mask
1032 (items, &rte_flow_item_port_id_mask,
1033 &flow_tcf_mask_supported.port_id,
1034 &flow_tcf_mask_empty.port_id,
1035 sizeof(flow_tcf_mask_supported.port_id),
1037 assert(mask.port_id);
1038 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1040 spec.port_id = items->spec;
1041 if (!mask.port_id->id)
1044 for (i = 0; ptoi[i].ifindex; ++i)
1045 if (ptoi[i].port_id == spec.port_id->id)
1047 assert(ptoi[i].ifindex);
1048 tcm->tcm_ifindex = ptoi[i].ifindex;
1050 case RTE_FLOW_ITEM_TYPE_ETH:
1051 mask.eth = flow_tcf_item_mask
1052 (items, &rte_flow_item_eth_mask,
1053 &flow_tcf_mask_supported.eth,
1054 &flow_tcf_mask_empty.eth,
1055 sizeof(flow_tcf_mask_supported.eth),
1058 if (mask.eth == &flow_tcf_mask_empty.eth)
1060 spec.eth = items->spec;
1061 if (mask.eth->type) {
1062 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1066 if (!is_zero_ether_addr(&mask.eth->dst)) {
1067 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1069 spec.eth->dst.addr_bytes);
1070 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1072 mask.eth->dst.addr_bytes);
1074 if (!is_zero_ether_addr(&mask.eth->src)) {
1075 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1077 spec.eth->src.addr_bytes);
1078 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1080 mask.eth->src.addr_bytes);
1083 case RTE_FLOW_ITEM_TYPE_VLAN:
1084 mask.vlan = flow_tcf_item_mask
1085 (items, &rte_flow_item_vlan_mask,
1086 &flow_tcf_mask_supported.vlan,
1087 &flow_tcf_mask_empty.vlan,
1088 sizeof(flow_tcf_mask_supported.vlan),
1092 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1093 RTE_BE16(ETH_P_8021Q));
1096 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1098 spec.vlan = items->spec;
1099 if (mask.vlan->inner_type) {
1100 mnl_attr_put_u16(nlh,
1101 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1102 spec.vlan->inner_type);
1103 vlan_eth_type_set = 1;
1105 if (mask.vlan->tci & RTE_BE16(0xe000))
1106 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1108 (spec.vlan->tci) >> 13) & 0x7);
1109 if (mask.vlan->tci & RTE_BE16(0x0fff))
1110 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1115 case RTE_FLOW_ITEM_TYPE_IPV4:
1116 mask.ipv4 = flow_tcf_item_mask
1117 (items, &rte_flow_item_ipv4_mask,
1118 &flow_tcf_mask_supported.ipv4,
1119 &flow_tcf_mask_empty.ipv4,
1120 sizeof(flow_tcf_mask_supported.ipv4),
1123 if (!eth_type_set || !vlan_eth_type_set)
1124 mnl_attr_put_u16(nlh,
1126 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1127 TCA_FLOWER_KEY_ETH_TYPE,
1128 RTE_BE16(ETH_P_IP));
1130 vlan_eth_type_set = 1;
1131 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1133 spec.ipv4 = items->spec;
1134 if (mask.ipv4->hdr.next_proto_id) {
1135 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1136 spec.ipv4->hdr.next_proto_id);
1139 if (mask.ipv4->hdr.src_addr) {
1140 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1141 spec.ipv4->hdr.src_addr);
1142 mnl_attr_put_u32(nlh,
1143 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1144 mask.ipv4->hdr.src_addr);
1146 if (mask.ipv4->hdr.dst_addr) {
1147 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1148 spec.ipv4->hdr.dst_addr);
1149 mnl_attr_put_u32(nlh,
1150 TCA_FLOWER_KEY_IPV4_DST_MASK,
1151 mask.ipv4->hdr.dst_addr);
1154 case RTE_FLOW_ITEM_TYPE_IPV6:
1155 mask.ipv6 = flow_tcf_item_mask
1156 (items, &rte_flow_item_ipv6_mask,
1157 &flow_tcf_mask_supported.ipv6,
1158 &flow_tcf_mask_empty.ipv6,
1159 sizeof(flow_tcf_mask_supported.ipv6),
1162 if (!eth_type_set || !vlan_eth_type_set)
1163 mnl_attr_put_u16(nlh,
1165 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1166 TCA_FLOWER_KEY_ETH_TYPE,
1167 RTE_BE16(ETH_P_IPV6));
1169 vlan_eth_type_set = 1;
1170 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1172 spec.ipv6 = items->spec;
1173 if (mask.ipv6->hdr.proto) {
1174 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1175 spec.ipv6->hdr.proto);
1178 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1179 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1180 sizeof(spec.ipv6->hdr.src_addr),
1181 spec.ipv6->hdr.src_addr);
1182 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1183 sizeof(mask.ipv6->hdr.src_addr),
1184 mask.ipv6->hdr.src_addr);
1186 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1187 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1188 sizeof(spec.ipv6->hdr.dst_addr),
1189 spec.ipv6->hdr.dst_addr);
1190 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1191 sizeof(mask.ipv6->hdr.dst_addr),
1192 mask.ipv6->hdr.dst_addr);
1195 case RTE_FLOW_ITEM_TYPE_UDP:
1196 mask.udp = flow_tcf_item_mask
1197 (items, &rte_flow_item_udp_mask,
1198 &flow_tcf_mask_supported.udp,
1199 &flow_tcf_mask_empty.udp,
1200 sizeof(flow_tcf_mask_supported.udp),
1204 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1206 if (mask.udp == &flow_tcf_mask_empty.udp)
1208 spec.udp = items->spec;
1209 if (mask.udp->hdr.src_port) {
1210 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1211 spec.udp->hdr.src_port);
1212 mnl_attr_put_u16(nlh,
1213 TCA_FLOWER_KEY_UDP_SRC_MASK,
1214 mask.udp->hdr.src_port);
1216 if (mask.udp->hdr.dst_port) {
1217 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1218 spec.udp->hdr.dst_port);
1219 mnl_attr_put_u16(nlh,
1220 TCA_FLOWER_KEY_UDP_DST_MASK,
1221 mask.udp->hdr.dst_port);
1224 case RTE_FLOW_ITEM_TYPE_TCP:
1225 mask.tcp = flow_tcf_item_mask
1226 (items, &rte_flow_item_tcp_mask,
1227 &flow_tcf_mask_supported.tcp,
1228 &flow_tcf_mask_empty.tcp,
1229 sizeof(flow_tcf_mask_supported.tcp),
1233 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1235 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1237 spec.tcp = items->spec;
1238 if (mask.tcp->hdr.src_port) {
1239 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1240 spec.tcp->hdr.src_port);
1241 mnl_attr_put_u16(nlh,
1242 TCA_FLOWER_KEY_TCP_SRC_MASK,
1243 mask.tcp->hdr.src_port);
1245 if (mask.tcp->hdr.dst_port) {
1246 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1247 spec.tcp->hdr.dst_port);
1248 mnl_attr_put_u16(nlh,
1249 TCA_FLOWER_KEY_TCP_DST_MASK,
1250 mask.tcp->hdr.dst_port);
1254 return rte_flow_error_set(error, ENOTSUP,
1255 RTE_FLOW_ERROR_TYPE_ITEM,
1256 NULL, "item not supported");
1259 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1260 na_act_index_cur = 1;
1261 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1262 struct nlattr *na_act_index;
1263 struct nlattr *na_act;
1264 unsigned int vlan_act;
1267 switch (actions->type) {
1268 case RTE_FLOW_ACTION_TYPE_VOID:
1270 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1271 conf.port_id = actions->conf;
1272 if (conf.port_id->original)
1275 for (i = 0; ptoi[i].ifindex; ++i)
1276 if (ptoi[i].port_id == conf.port_id->id)
1278 assert(ptoi[i].ifindex);
1280 mnl_attr_nest_start(nlh, na_act_index_cur++);
1281 assert(na_act_index);
1282 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1283 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1285 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1286 sizeof(struct tc_mirred),
1287 &(struct tc_mirred){
1288 .action = TC_ACT_STOLEN,
1289 .eaction = TCA_EGRESS_REDIR,
1290 .ifindex = ptoi[i].ifindex,
1292 mnl_attr_nest_end(nlh, na_act);
1293 mnl_attr_nest_end(nlh, na_act_index);
1295 case RTE_FLOW_ACTION_TYPE_DROP:
1297 mnl_attr_nest_start(nlh, na_act_index_cur++);
1298 assert(na_act_index);
1299 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1300 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1302 mnl_attr_put(nlh, TCA_GACT_PARMS,
1303 sizeof(struct tc_gact),
1305 .action = TC_ACT_SHOT,
1307 mnl_attr_nest_end(nlh, na_act);
1308 mnl_attr_nest_end(nlh, na_act_index);
1310 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1311 conf.of_push_vlan = NULL;
1312 vlan_act = TCA_VLAN_ACT_POP;
1313 goto action_of_vlan;
1314 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1315 conf.of_push_vlan = actions->conf;
1316 vlan_act = TCA_VLAN_ACT_PUSH;
1317 goto action_of_vlan;
1318 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1319 conf.of_set_vlan_vid = actions->conf;
1321 goto override_na_vlan_id;
1322 vlan_act = TCA_VLAN_ACT_MODIFY;
1323 goto action_of_vlan;
1324 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1325 conf.of_set_vlan_pcp = actions->conf;
1326 if (na_vlan_priority)
1327 goto override_na_vlan_priority;
1328 vlan_act = TCA_VLAN_ACT_MODIFY;
1329 goto action_of_vlan;
1332 mnl_attr_nest_start(nlh, na_act_index_cur++);
1333 assert(na_act_index);
1334 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1335 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1337 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1338 sizeof(struct tc_vlan),
1340 .action = TC_ACT_PIPE,
1341 .v_action = vlan_act,
1343 if (vlan_act == TCA_VLAN_ACT_POP) {
1344 mnl_attr_nest_end(nlh, na_act);
1345 mnl_attr_nest_end(nlh, na_act_index);
1348 if (vlan_act == TCA_VLAN_ACT_PUSH)
1349 mnl_attr_put_u16(nlh,
1350 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1351 conf.of_push_vlan->ethertype);
1352 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1353 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1354 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1355 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1356 mnl_attr_nest_end(nlh, na_act);
1357 mnl_attr_nest_end(nlh, na_act_index);
1358 if (actions->type ==
1359 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1360 override_na_vlan_id:
1361 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1362 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1364 (conf.of_set_vlan_vid->vlan_vid);
1365 } else if (actions->type ==
1366 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1367 override_na_vlan_priority:
1368 na_vlan_priority->nla_type =
1369 TCA_VLAN_PUSH_VLAN_PRIORITY;
1370 *(uint8_t *)mnl_attr_get_payload
1371 (na_vlan_priority) =
1372 conf.of_set_vlan_pcp->vlan_pcp;
1376 return rte_flow_error_set(error, ENOTSUP,
1377 RTE_FLOW_ERROR_TYPE_ACTION,
1379 "action not supported");
1383 assert(na_flower_act);
1384 mnl_attr_nest_end(nlh, na_flower_act);
1385 mnl_attr_nest_end(nlh, na_flower);
1390 * Send Netlink message with acknowledgment.
1393 * Libmnl socket to use.
1395 * Message to send. This function always raises the NLM_F_ACK flag before
1399 * 0 on success, a negative errno value otherwise and rte_errno is set.
1402 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1404 alignas(struct nlmsghdr)
1405 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1406 nlh->nlmsg_len - sizeof(*nlh)];
1407 uint32_t seq = random();
1410 nlh->nlmsg_flags |= NLM_F_ACK;
1411 nlh->nlmsg_seq = seq;
1412 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1414 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1417 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1425 * Apply flow to E-Switch by sending Netlink message.
1428 * Pointer to Ethernet device.
1429 * @param[in, out] flow
1430 * Pointer to the sub flow.
1432 * Pointer to the error structure.
1435 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1438 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1439 struct rte_flow_error *error)
1441 struct priv *priv = dev->data->dev_private;
1442 struct mnl_socket *nl = priv->mnl_socket;
1443 struct mlx5_flow *dev_flow;
1444 struct nlmsghdr *nlh;
1446 dev_flow = LIST_FIRST(&flow->dev_flows);
1447 /* E-Switch flow can't be expanded. */
1448 assert(!LIST_NEXT(dev_flow, next));
1449 nlh = dev_flow->tcf.nlh;
1450 nlh->nlmsg_type = RTM_NEWTFILTER;
1451 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1452 if (!flow_tcf_nl_ack(nl, nlh))
1454 return rte_flow_error_set(error, rte_errno,
1455 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1456 "netlink: failed to create TC flow rule");
1460 * Remove flow from E-Switch by sending Netlink message.
1463 * Pointer to Ethernet device.
1464 * @param[in, out] flow
1465 * Pointer to the sub flow.
1468 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1470 struct priv *priv = dev->data->dev_private;
1471 struct mnl_socket *nl = priv->mnl_socket;
1472 struct mlx5_flow *dev_flow;
1473 struct nlmsghdr *nlh;
1477 dev_flow = LIST_FIRST(&flow->dev_flows);
1480 /* E-Switch flow can't be expanded. */
1481 assert(!LIST_NEXT(dev_flow, next));
1482 nlh = dev_flow->tcf.nlh;
1483 nlh->nlmsg_type = RTM_DELTFILTER;
1484 nlh->nlmsg_flags = NLM_F_REQUEST;
1485 flow_tcf_nl_ack(nl, nlh);
1489 * Remove flow from E-Switch and release resources of the device flow.
1492 * Pointer to Ethernet device.
1493 * @param[in, out] flow
1494 * Pointer to the sub flow.
1497 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1499 struct mlx5_flow *dev_flow;
1503 flow_tcf_remove(dev, flow);
1504 dev_flow = LIST_FIRST(&flow->dev_flows);
1507 /* E-Switch flow can't be expanded. */
1508 assert(!LIST_NEXT(dev_flow, next));
1509 LIST_REMOVE(dev_flow, next);
1513 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
1514 .validate = flow_tcf_validate,
1515 .prepare = flow_tcf_prepare,
1516 .translate = flow_tcf_translate,
1517 .apply = flow_tcf_apply,
1518 .remove = flow_tcf_remove,
1519 .destroy = flow_tcf_destroy,
1523 * Initialize ingress qdisc of a given network interface.
1526 * Libmnl socket of the @p NETLINK_ROUTE kind.
1528 * Index of network interface to initialize.
1530 * Perform verbose error reporting if not NULL.
1533 * 0 on success, a negative errno value otherwise and rte_errno is set.
1536 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
1537 struct rte_flow_error *error)
1539 struct nlmsghdr *nlh;
1541 alignas(struct nlmsghdr)
1542 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1544 /* Destroy existing ingress qdisc and everything attached to it. */
1545 nlh = mnl_nlmsg_put_header(buf);
1546 nlh->nlmsg_type = RTM_DELQDISC;
1547 nlh->nlmsg_flags = NLM_F_REQUEST;
1548 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1549 tcm->tcm_family = AF_UNSPEC;
1550 tcm->tcm_ifindex = ifindex;
1551 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1552 tcm->tcm_parent = TC_H_INGRESS;
1553 /* Ignore errors when qdisc is already absent. */
1554 if (flow_tcf_nl_ack(nl, nlh) &&
1555 rte_errno != EINVAL && rte_errno != ENOENT)
1556 return rte_flow_error_set(error, rte_errno,
1557 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1558 "netlink: failed to remove ingress"
1560 /* Create fresh ingress qdisc. */
1561 nlh = mnl_nlmsg_put_header(buf);
1562 nlh->nlmsg_type = RTM_NEWQDISC;
1563 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1564 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1565 tcm->tcm_family = AF_UNSPEC;
1566 tcm->tcm_ifindex = ifindex;
1567 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1568 tcm->tcm_parent = TC_H_INGRESS;
1569 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1570 if (flow_tcf_nl_ack(nl, nlh))
1571 return rte_flow_error_set(error, rte_errno,
1572 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1573 "netlink: failed to create ingress"
1579 * Create and configure a libmnl socket for Netlink flow rules.
1582 * A valid libmnl socket object pointer on success, NULL otherwise and
1586 mlx5_flow_tcf_socket_create(void)
1588 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1591 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1593 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1598 mnl_socket_close(nl);
1603 * Destroy a libmnl socket.
1606 * Libmnl socket of the @p NETLINK_ROUTE kind.
1609 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
1611 mnl_socket_close(nl);