1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
28 #include <rte_malloc.h>
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
34 #ifdef HAVE_TC_ACT_VLAN
36 #include <linux/tc_act/tc_vlan.h>
38 #else /* HAVE_TC_ACT_VLAN */
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
54 #endif /* HAVE_TC_ACT_VLAN */
56 /* Normally found in linux/netlink.h. */
57 #ifndef NETLINK_CAP_ACK
58 #define NETLINK_CAP_ACK 10
61 /* Normally found in linux/pkt_sched.h. */
62 #ifndef TC_H_MIN_INGRESS
63 #define TC_H_MIN_INGRESS 0xfff2u
66 /* Normally found in linux/pkt_cls.h. */
67 #ifndef TCA_CLS_FLAGS_SKIP_SW
68 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
70 #ifndef HAVE_TCA_FLOWER_ACT
71 #define TCA_FLOWER_ACT 3
73 #ifndef HAVE_TCA_FLOWER_FLAGS
74 #define TCA_FLOWER_FLAGS 22
76 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
77 #define TCA_FLOWER_KEY_ETH_TYPE 8
79 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
80 #define TCA_FLOWER_KEY_ETH_DST 4
82 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
83 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
85 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
86 #define TCA_FLOWER_KEY_ETH_SRC 6
88 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
89 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
91 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
92 #define TCA_FLOWER_KEY_IP_PROTO 9
94 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
95 #define TCA_FLOWER_KEY_IPV4_SRC 10
97 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
98 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
100 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
101 #define TCA_FLOWER_KEY_IPV4_DST 12
103 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
104 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
106 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
107 #define TCA_FLOWER_KEY_IPV6_SRC 14
109 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
110 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
112 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
113 #define TCA_FLOWER_KEY_IPV6_DST 16
115 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
116 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
118 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
119 #define TCA_FLOWER_KEY_TCP_SRC 18
121 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
122 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
124 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
125 #define TCA_FLOWER_KEY_TCP_DST 19
127 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
128 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
130 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
131 #define TCA_FLOWER_KEY_UDP_SRC 20
133 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
134 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
136 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
137 #define TCA_FLOWER_KEY_UDP_DST 21
139 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
140 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
142 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
143 #define TCA_FLOWER_KEY_VLAN_ID 23
145 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
146 #define TCA_FLOWER_KEY_VLAN_PRIO 24
148 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
149 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
151 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
152 #define TCA_FLOWER_KEY_TCP_FLAGS 71
154 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
155 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
158 #ifndef IPV6_ADDR_LEN
159 #define IPV6_ADDR_LEN 16
162 /** Empty masks for known item types. */
164 struct rte_flow_item_port_id port_id;
165 struct rte_flow_item_eth eth;
166 struct rte_flow_item_vlan vlan;
167 struct rte_flow_item_ipv4 ipv4;
168 struct rte_flow_item_ipv6 ipv6;
169 struct rte_flow_item_tcp tcp;
170 struct rte_flow_item_udp udp;
171 } flow_tcf_mask_empty;
173 /** Supported masks for known item types. */
174 static const struct {
175 struct rte_flow_item_port_id port_id;
176 struct rte_flow_item_eth eth;
177 struct rte_flow_item_vlan vlan;
178 struct rte_flow_item_ipv4 ipv4;
179 struct rte_flow_item_ipv6 ipv6;
180 struct rte_flow_item_tcp tcp;
181 struct rte_flow_item_udp udp;
182 } flow_tcf_mask_supported = {
187 .type = RTE_BE16(0xffff),
188 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
189 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
192 /* PCP and VID only, no DEI. */
193 .tci = RTE_BE16(0xefff),
194 .inner_type = RTE_BE16(0xffff),
197 .next_proto_id = 0xff,
198 .src_addr = RTE_BE32(0xffffffff),
199 .dst_addr = RTE_BE32(0xffffffff),
204 "\xff\xff\xff\xff\xff\xff\xff\xff"
205 "\xff\xff\xff\xff\xff\xff\xff\xff",
207 "\xff\xff\xff\xff\xff\xff\xff\xff"
208 "\xff\xff\xff\xff\xff\xff\xff\xff",
211 .src_port = RTE_BE16(0xffff),
212 .dst_port = RTE_BE16(0xffff),
216 .src_port = RTE_BE16(0xffff),
217 .dst_port = RTE_BE16(0xffff),
221 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
222 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
223 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
224 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
225 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
227 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
229 /** DPDK port to network interface index (ifindex) conversion. */
230 struct flow_tcf_ptoi {
231 uint16_t port_id; /**< DPDK port ID. */
232 unsigned int ifindex; /**< Network interface index. */
235 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
236 #define MLX5_TCF_VLAN_ACTIONS \
237 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
238 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
241 * Retrieve mask for pattern item.
243 * This function does basic sanity checks on a pattern item in order to
244 * return the most appropriate mask for it.
247 * Item specification.
248 * @param[in] mask_default
249 * Default mask for pattern item as specified by the flow API.
250 * @param[in] mask_supported
251 * Mask fields supported by the implementation.
252 * @param[in] mask_empty
253 * Empty mask to return when there is no specification.
255 * Perform verbose error reporting if not NULL.
258 * Either @p item->mask or one of the mask parameters on success, NULL
259 * otherwise and rte_errno is set.
262 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
263 const void *mask_supported, const void *mask_empty,
264 size_t mask_size, struct rte_flow_error *error)
269 /* item->last and item->mask cannot exist without item->spec. */
270 if (!item->spec && (item->mask || item->last)) {
271 rte_flow_error_set(error, EINVAL,
272 RTE_FLOW_ERROR_TYPE_ITEM, item,
273 "\"mask\" or \"last\" field provided without"
274 " a corresponding \"spec\"");
277 /* No spec, no mask, no problem. */
280 mask = item->mask ? item->mask : mask_default;
283 * Single-pass check to make sure that:
284 * - Mask is supported, no bits are set outside mask_supported.
285 * - Both item->spec and item->last are included in mask.
287 for (i = 0; i != mask_size; ++i) {
290 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
291 ((const uint8_t *)mask_supported)[i]) {
292 rte_flow_error_set(error, ENOTSUP,
293 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
294 "unsupported field found"
299 (((const uint8_t *)item->spec)[i] & mask[i]) !=
300 (((const uint8_t *)item->last)[i] & mask[i])) {
301 rte_flow_error_set(error, EINVAL,
302 RTE_FLOW_ERROR_TYPE_ITEM_LAST,
304 "range between \"spec\" and \"last\""
305 " not comprised in \"mask\"");
313 * Build a conversion table between port ID and ifindex.
316 * Pointer to Ethernet device.
318 * Pointer to ptoi table.
320 * Size of ptoi table provided.
323 * Size of ptoi table filled.
326 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
329 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
330 uint16_t port_id[n + 1];
332 unsigned int own = 0;
334 /* At least one port is needed when no switch domain is present. */
337 port_id[0] = dev->data->port_id;
339 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
343 for (i = 0; i != n; ++i) {
344 struct rte_eth_dev_info dev_info;
346 rte_eth_dev_info_get(port_id[i], &dev_info);
347 if (port_id[i] == dev->data->port_id)
349 ptoi[i].port_id = port_id[i];
350 ptoi[i].ifindex = dev_info.if_index;
352 /* Ensure first entry of ptoi[] is the current device. */
358 /* An entry with zero ifindex terminates ptoi[]. */
365 * Verify the @p attr will be correctly understood by the E-switch.
368 * Pointer to flow attributes
370 * Pointer to error structure.
373 * 0 on success, a negative errno value otherwise and rte_errno is set.
376 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
377 struct rte_flow_error *error)
380 * Supported attributes: no groups, some priorities and ingress only.
381 * Don't care about transfer as it is the caller's problem.
384 return rte_flow_error_set(error, ENOTSUP,
385 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
386 "groups are not supported");
387 if (attr->priority > 0xfffe)
388 return rte_flow_error_set(error, ENOTSUP,
389 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
391 "lowest priority level is 0xfffe");
393 return rte_flow_error_set(error, EINVAL,
394 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
395 attr, "only ingress is supported");
397 return rte_flow_error_set(error, ENOTSUP,
398 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
399 attr, "egress is not supported");
404 * Validate flow for E-Switch.
407 * Pointer to the priv structure.
409 * Pointer to the flow attributes.
411 * Pointer to the list of items.
413 * Pointer to the list of actions.
415 * Pointer to the error structure.
418 * 0 on success, a negative errno value otherwise and rte_ernno is set.
421 flow_tcf_validate(struct rte_eth_dev *dev,
422 const struct rte_flow_attr *attr,
423 const struct rte_flow_item items[],
424 const struct rte_flow_action actions[],
425 struct rte_flow_error *error)
428 const struct rte_flow_item_port_id *port_id;
429 const struct rte_flow_item_eth *eth;
430 const struct rte_flow_item_vlan *vlan;
431 const struct rte_flow_item_ipv4 *ipv4;
432 const struct rte_flow_item_ipv6 *ipv6;
433 const struct rte_flow_item_tcp *tcp;
434 const struct rte_flow_item_udp *udp;
437 const struct rte_flow_action_port_id *port_id;
438 const struct rte_flow_action_of_push_vlan *of_push_vlan;
439 const struct rte_flow_action_of_set_vlan_vid *
441 const struct rte_flow_action_of_set_vlan_pcp *
444 uint32_t item_flags = 0;
445 uint32_t action_flags = 0;
446 uint8_t next_protocol = -1;
447 unsigned int tcm_ifindex = 0;
448 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
449 struct rte_eth_dev *port_id_dev = NULL;
453 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
454 PTOI_TABLE_SZ_MAX(dev)));
455 ret = flow_tcf_validate_attributes(attr, error);
458 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
461 switch (items->type) {
462 case RTE_FLOW_ITEM_TYPE_VOID:
464 case RTE_FLOW_ITEM_TYPE_PORT_ID:
465 mask.port_id = flow_tcf_item_mask
466 (items, &rte_flow_item_port_id_mask,
467 &flow_tcf_mask_supported.port_id,
468 &flow_tcf_mask_empty.port_id,
469 sizeof(flow_tcf_mask_supported.port_id),
473 if (mask.port_id == &flow_tcf_mask_empty.port_id) {
477 spec.port_id = items->spec;
478 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
479 return rte_flow_error_set
481 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
483 "no support for partial mask on"
485 if (!mask.port_id->id)
488 for (i = 0; ptoi[i].ifindex; ++i)
489 if (ptoi[i].port_id == spec.port_id->id)
491 if (!ptoi[i].ifindex)
492 return rte_flow_error_set
494 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
496 "missing data to convert port ID to"
498 if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
499 return rte_flow_error_set
501 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
503 "cannot match traffic for"
504 " several port IDs through"
505 " a single flow rule");
506 tcm_ifindex = ptoi[i].ifindex;
509 case RTE_FLOW_ITEM_TYPE_ETH:
510 ret = mlx5_flow_validate_item_eth(items, item_flags,
514 item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
516 * Redundant check due to different supported mask.
517 * Same for the rest of items.
519 mask.eth = flow_tcf_item_mask
520 (items, &rte_flow_item_eth_mask,
521 &flow_tcf_mask_supported.eth,
522 &flow_tcf_mask_empty.eth,
523 sizeof(flow_tcf_mask_supported.eth),
527 if (mask.eth->type && mask.eth->type !=
529 return rte_flow_error_set
531 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
533 "no support for partial mask on"
536 case RTE_FLOW_ITEM_TYPE_VLAN:
537 ret = mlx5_flow_validate_item_vlan(items, item_flags,
541 item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
542 mask.vlan = flow_tcf_item_mask
543 (items, &rte_flow_item_vlan_mask,
544 &flow_tcf_mask_supported.vlan,
545 &flow_tcf_mask_empty.vlan,
546 sizeof(flow_tcf_mask_supported.vlan),
550 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
551 (mask.vlan->tci & RTE_BE16(0xe000)) !=
553 (mask.vlan->tci & RTE_BE16(0x0fff) &&
554 (mask.vlan->tci & RTE_BE16(0x0fff)) !=
556 (mask.vlan->inner_type &&
557 mask.vlan->inner_type != RTE_BE16(0xffff)))
558 return rte_flow_error_set
560 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
562 "no support for partial masks on"
563 " \"tci\" (PCP and VID parts) and"
564 " \"inner_type\" fields");
566 case RTE_FLOW_ITEM_TYPE_IPV4:
567 ret = mlx5_flow_validate_item_ipv4(items, item_flags,
571 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
572 mask.ipv4 = flow_tcf_item_mask
573 (items, &rte_flow_item_ipv4_mask,
574 &flow_tcf_mask_supported.ipv4,
575 &flow_tcf_mask_empty.ipv4,
576 sizeof(flow_tcf_mask_supported.ipv4),
580 if (mask.ipv4->hdr.next_proto_id &&
581 mask.ipv4->hdr.next_proto_id != 0xff)
582 return rte_flow_error_set
584 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
586 "no support for partial mask on"
587 " \"hdr.next_proto_id\" field");
588 else if (mask.ipv4->hdr.next_proto_id)
590 ((const struct rte_flow_item_ipv4 *)
591 (items->spec))->hdr.next_proto_id;
593 case RTE_FLOW_ITEM_TYPE_IPV6:
594 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
598 item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
599 mask.ipv6 = flow_tcf_item_mask
600 (items, &rte_flow_item_ipv6_mask,
601 &flow_tcf_mask_supported.ipv6,
602 &flow_tcf_mask_empty.ipv6,
603 sizeof(flow_tcf_mask_supported.ipv6),
607 if (mask.ipv6->hdr.proto &&
608 mask.ipv6->hdr.proto != 0xff)
609 return rte_flow_error_set
611 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
613 "no support for partial mask on"
614 " \"hdr.proto\" field");
615 else if (mask.ipv6->hdr.proto)
617 ((const struct rte_flow_item_ipv6 *)
618 (items->spec))->hdr.proto;
620 case RTE_FLOW_ITEM_TYPE_UDP:
621 ret = mlx5_flow_validate_item_udp(items, item_flags,
622 next_protocol, error);
625 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
626 mask.udp = flow_tcf_item_mask
627 (items, &rte_flow_item_udp_mask,
628 &flow_tcf_mask_supported.udp,
629 &flow_tcf_mask_empty.udp,
630 sizeof(flow_tcf_mask_supported.udp),
635 case RTE_FLOW_ITEM_TYPE_TCP:
636 ret = mlx5_flow_validate_item_tcp
639 &flow_tcf_mask_supported.tcp,
643 item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
644 mask.tcp = flow_tcf_item_mask
645 (items, &rte_flow_item_tcp_mask,
646 &flow_tcf_mask_supported.tcp,
647 &flow_tcf_mask_empty.tcp,
648 sizeof(flow_tcf_mask_supported.tcp),
654 return rte_flow_error_set(error, ENOTSUP,
655 RTE_FLOW_ERROR_TYPE_ITEM,
656 NULL, "item not supported");
659 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
662 switch (actions->type) {
663 case RTE_FLOW_ACTION_TYPE_VOID:
665 case RTE_FLOW_ACTION_TYPE_PORT_ID:
666 if (action_flags & MLX5_TCF_FATE_ACTIONS)
667 return rte_flow_error_set
669 RTE_FLOW_ERROR_TYPE_ACTION, actions,
670 "can't have multiple fate actions");
671 conf.port_id = actions->conf;
672 if (conf.port_id->original)
675 for (i = 0; ptoi[i].ifindex; ++i)
676 if (ptoi[i].port_id == conf.port_id->id)
678 if (!ptoi[i].ifindex)
679 return rte_flow_error_set
681 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
683 "missing data to convert port ID to"
685 action_flags |= MLX5_FLOW_ACTION_PORT_ID;
686 port_id_dev = &rte_eth_devices[conf.port_id->id];
688 case RTE_FLOW_ACTION_TYPE_DROP:
689 if (action_flags & MLX5_TCF_FATE_ACTIONS)
690 return rte_flow_error_set
692 RTE_FLOW_ERROR_TYPE_ACTION, actions,
693 "can't have multiple fate actions");
694 action_flags |= MLX5_FLOW_ACTION_DROP;
696 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
697 action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
699 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
700 action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
702 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
703 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
704 return rte_flow_error_set
706 RTE_FLOW_ERROR_TYPE_ACTION, actions,
707 "vlan modify is not supported,"
708 " set action must follow push action");
709 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
711 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
712 if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
713 return rte_flow_error_set
715 RTE_FLOW_ERROR_TYPE_ACTION, actions,
716 "vlan modify is not supported,"
717 " set action must follow push action");
718 action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
721 return rte_flow_error_set(error, ENOTSUP,
722 RTE_FLOW_ERROR_TYPE_ACTION,
724 "action not supported");
728 * FW syndrome (0xA9C090):
729 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
730 * forward to the uplink.
732 if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
733 (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
734 ((struct priv *)port_id_dev->data->dev_private)->representor)
735 return rte_flow_error_set(error, ENOTSUP,
736 RTE_FLOW_ERROR_TYPE_ACTION, actions,
737 "vlan push can only be applied"
738 " when forwarding to uplink port");
740 * FW syndrome (0x294609):
741 * set_flow_table_entry: modify/pop/push actions in fdb flow table
742 * are supported only while forwarding to vport.
744 if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
745 !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
746 return rte_flow_error_set(error, ENOTSUP,
747 RTE_FLOW_ERROR_TYPE_ACTION, actions,
748 "vlan actions are supported"
749 " only with port_id action");
750 if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
751 return rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION, actions,
753 "no fate action is found");
758 * Calculate maximum size of memory for flow items of Linux TC flower and
759 * extract specified items.
762 * Pointer to the list of items.
763 * @param[out] item_flags
764 * Pointer to the detected items.
767 * Maximum size of memory for items.
770 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
771 uint64_t *item_flags)
776 size += SZ_NLATTR_STRZ_OF("flower") +
777 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
778 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
779 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
780 switch (items->type) {
781 case RTE_FLOW_ITEM_TYPE_VOID:
783 case RTE_FLOW_ITEM_TYPE_PORT_ID:
785 case RTE_FLOW_ITEM_TYPE_ETH:
786 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
787 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
788 /* dst/src MAC addr and mask. */
789 flags |= MLX5_FLOW_LAYER_OUTER_L2;
791 case RTE_FLOW_ITEM_TYPE_VLAN:
792 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
793 SZ_NLATTR_TYPE_OF(uint16_t) +
794 /* VLAN Ether type. */
795 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
796 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
797 flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
799 case RTE_FLOW_ITEM_TYPE_IPV4:
800 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
801 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
802 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
803 /* dst/src IP addr and mask. */
804 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
806 case RTE_FLOW_ITEM_TYPE_IPV6:
807 size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
808 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
809 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
810 /* dst/src IP addr and mask. */
811 flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
813 case RTE_FLOW_ITEM_TYPE_UDP:
814 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
815 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
816 /* dst/src port and mask. */
817 flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
819 case RTE_FLOW_ITEM_TYPE_TCP:
820 size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
821 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
822 /* dst/src port and mask. */
823 flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
827 "unsupported item %p type %d,"
828 " items must be validated before flow creation",
829 (const void *)items, items->type);
838 * Calculate maximum size of memory for flow actions of Linux TC flower and
839 * extract specified actions.
842 * Pointer to the list of actions.
843 * @param[out] action_flags
844 * Pointer to the detected actions.
847 * Maximum size of memory for actions.
850 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
851 uint64_t *action_flags)
856 size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
857 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
858 switch (actions->type) {
859 case RTE_FLOW_ACTION_TYPE_VOID:
861 case RTE_FLOW_ACTION_TYPE_PORT_ID:
862 size += SZ_NLATTR_NEST + /* na_act_index. */
863 SZ_NLATTR_STRZ_OF("mirred") +
864 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
865 SZ_NLATTR_TYPE_OF(struct tc_mirred);
866 flags |= MLX5_FLOW_ACTION_PORT_ID;
868 case RTE_FLOW_ACTION_TYPE_DROP:
869 size += SZ_NLATTR_NEST + /* na_act_index. */
870 SZ_NLATTR_STRZ_OF("gact") +
871 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
872 SZ_NLATTR_TYPE_OF(struct tc_gact);
873 flags |= MLX5_FLOW_ACTION_DROP;
875 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
876 flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
878 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
879 flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
881 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
882 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
884 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
885 flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
888 size += SZ_NLATTR_NEST + /* na_act_index. */
889 SZ_NLATTR_STRZ_OF("vlan") +
890 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
891 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
892 SZ_NLATTR_TYPE_OF(uint16_t) +
894 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
895 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
899 "unsupported action %p type %d,"
900 " items must be validated before flow creation",
901 (const void *)actions, actions->type);
905 *action_flags = flags;
910 * Brand rtnetlink buffer with unique handle.
912 * This handle should be unique for a given network interface to avoid
916 * Pointer to Netlink message.
918 * Unique 32-bit handle to use.
921 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
923 struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
925 tcm->tcm_handle = handle;
926 DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
927 (void *)nlh, handle);
931 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
932 * memory required, allocates the memory, initializes Netlink message headers
933 * and set unique TC message handle.
936 * Pointer to the flow attributes.
938 * Pointer to the list of items.
940 * Pointer to the list of actions.
941 * @param[out] item_flags
942 * Pointer to bit mask of all items detected.
943 * @param[out] action_flags
944 * Pointer to bit mask of all actions detected.
946 * Pointer to the error structure.
949 * Pointer to mlx5_flow object on success,
950 * otherwise NULL and rte_ernno is set.
952 static struct mlx5_flow *
953 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
954 const struct rte_flow_item items[],
955 const struct rte_flow_action actions[],
956 uint64_t *item_flags, uint64_t *action_flags,
957 struct rte_flow_error *error)
959 size_t size = sizeof(struct mlx5_flow) +
960 MNL_ALIGN(sizeof(struct nlmsghdr)) +
961 MNL_ALIGN(sizeof(struct tcmsg));
962 struct mlx5_flow *dev_flow;
963 struct nlmsghdr *nlh;
966 size += flow_tcf_get_items_and_size(items, item_flags);
967 size += flow_tcf_get_actions_and_size(actions, action_flags);
968 dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
970 rte_flow_error_set(error, ENOMEM,
971 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
972 "not enough memory to create E-Switch flow");
975 nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
976 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
977 *dev_flow = (struct mlx5_flow){
978 .tcf = (struct mlx5_flow_tcf){
984 * Generate a reasonably unique handle based on the address of the
987 * This is straightforward on 32-bit systems where the flow pointer can
988 * be used directly. Otherwise, its least significant part is taken
989 * after shifting it by the previous power of two of the pointed buffer
992 if (sizeof(dev_flow) <= 4)
993 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
995 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
996 rte_log2_u32(rte_align32prevpow2(size)));
1001 * Translate flow for Linux TC flower and construct Netlink message.
1004 * Pointer to the priv structure.
1005 * @param[in, out] flow
1006 * Pointer to the sub flow.
1008 * Pointer to the flow attributes.
1010 * Pointer to the list of items.
1011 * @param[in] actions
1012 * Pointer to the list of actions.
1014 * Pointer to the error structure.
1017 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1020 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1021 const struct rte_flow_attr *attr,
1022 const struct rte_flow_item items[],
1023 const struct rte_flow_action actions[],
1024 struct rte_flow_error *error)
1027 const struct rte_flow_item_port_id *port_id;
1028 const struct rte_flow_item_eth *eth;
1029 const struct rte_flow_item_vlan *vlan;
1030 const struct rte_flow_item_ipv4 *ipv4;
1031 const struct rte_flow_item_ipv6 *ipv6;
1032 const struct rte_flow_item_tcp *tcp;
1033 const struct rte_flow_item_udp *udp;
1036 const struct rte_flow_action_port_id *port_id;
1037 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1038 const struct rte_flow_action_of_set_vlan_vid *
1040 const struct rte_flow_action_of_set_vlan_pcp *
1043 struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1044 struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1045 struct tcmsg *tcm = dev_flow->tcf.tcm;
1046 uint32_t na_act_index_cur;
1047 bool eth_type_set = 0;
1048 bool vlan_present = 0;
1049 bool vlan_eth_type_set = 0;
1050 bool ip_proto_set = 0;
1051 struct nlattr *na_flower;
1052 struct nlattr *na_flower_act;
1053 struct nlattr *na_vlan_id = NULL;
1054 struct nlattr *na_vlan_priority = NULL;
1056 claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1057 PTOI_TABLE_SZ_MAX(dev)));
1058 nlh = dev_flow->tcf.nlh;
1059 tcm = dev_flow->tcf.tcm;
1060 /* Prepare API must have been called beforehand. */
1061 assert(nlh != NULL && tcm != NULL);
1062 tcm->tcm_family = AF_UNSPEC;
1063 tcm->tcm_ifindex = ptoi[0].ifindex;
1064 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1066 * Priority cannot be zero to prevent the kernel from picking one
1069 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1070 RTE_BE16(ETH_P_ALL));
1071 mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1072 na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1073 mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1074 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1077 switch (items->type) {
1078 case RTE_FLOW_ITEM_TYPE_VOID:
1080 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1081 mask.port_id = flow_tcf_item_mask
1082 (items, &rte_flow_item_port_id_mask,
1083 &flow_tcf_mask_supported.port_id,
1084 &flow_tcf_mask_empty.port_id,
1085 sizeof(flow_tcf_mask_supported.port_id),
1087 assert(mask.port_id);
1088 if (mask.port_id == &flow_tcf_mask_empty.port_id)
1090 spec.port_id = items->spec;
1091 if (!mask.port_id->id)
1094 for (i = 0; ptoi[i].ifindex; ++i)
1095 if (ptoi[i].port_id == spec.port_id->id)
1097 assert(ptoi[i].ifindex);
1098 tcm->tcm_ifindex = ptoi[i].ifindex;
1100 case RTE_FLOW_ITEM_TYPE_ETH:
1101 mask.eth = flow_tcf_item_mask
1102 (items, &rte_flow_item_eth_mask,
1103 &flow_tcf_mask_supported.eth,
1104 &flow_tcf_mask_empty.eth,
1105 sizeof(flow_tcf_mask_supported.eth),
1108 if (mask.eth == &flow_tcf_mask_empty.eth)
1110 spec.eth = items->spec;
1111 if (mask.eth->type) {
1112 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1116 if (!is_zero_ether_addr(&mask.eth->dst)) {
1117 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1119 spec.eth->dst.addr_bytes);
1120 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1122 mask.eth->dst.addr_bytes);
1124 if (!is_zero_ether_addr(&mask.eth->src)) {
1125 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1127 spec.eth->src.addr_bytes);
1128 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1130 mask.eth->src.addr_bytes);
1133 case RTE_FLOW_ITEM_TYPE_VLAN:
1134 mask.vlan = flow_tcf_item_mask
1135 (items, &rte_flow_item_vlan_mask,
1136 &flow_tcf_mask_supported.vlan,
1137 &flow_tcf_mask_empty.vlan,
1138 sizeof(flow_tcf_mask_supported.vlan),
1142 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1143 RTE_BE16(ETH_P_8021Q));
1146 if (mask.vlan == &flow_tcf_mask_empty.vlan)
1148 spec.vlan = items->spec;
1149 if (mask.vlan->inner_type) {
1150 mnl_attr_put_u16(nlh,
1151 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1152 spec.vlan->inner_type);
1153 vlan_eth_type_set = 1;
1155 if (mask.vlan->tci & RTE_BE16(0xe000))
1156 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1158 (spec.vlan->tci) >> 13) & 0x7);
1159 if (mask.vlan->tci & RTE_BE16(0x0fff))
1160 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1165 case RTE_FLOW_ITEM_TYPE_IPV4:
1166 mask.ipv4 = flow_tcf_item_mask
1167 (items, &rte_flow_item_ipv4_mask,
1168 &flow_tcf_mask_supported.ipv4,
1169 &flow_tcf_mask_empty.ipv4,
1170 sizeof(flow_tcf_mask_supported.ipv4),
1173 if (!eth_type_set || !vlan_eth_type_set)
1174 mnl_attr_put_u16(nlh,
1176 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1177 TCA_FLOWER_KEY_ETH_TYPE,
1178 RTE_BE16(ETH_P_IP));
1180 vlan_eth_type_set = 1;
1181 if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1183 spec.ipv4 = items->spec;
1184 if (mask.ipv4->hdr.next_proto_id) {
1185 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1186 spec.ipv4->hdr.next_proto_id);
1189 if (mask.ipv4->hdr.src_addr) {
1190 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1191 spec.ipv4->hdr.src_addr);
1192 mnl_attr_put_u32(nlh,
1193 TCA_FLOWER_KEY_IPV4_SRC_MASK,
1194 mask.ipv4->hdr.src_addr);
1196 if (mask.ipv4->hdr.dst_addr) {
1197 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1198 spec.ipv4->hdr.dst_addr);
1199 mnl_attr_put_u32(nlh,
1200 TCA_FLOWER_KEY_IPV4_DST_MASK,
1201 mask.ipv4->hdr.dst_addr);
1204 case RTE_FLOW_ITEM_TYPE_IPV6:
1205 mask.ipv6 = flow_tcf_item_mask
1206 (items, &rte_flow_item_ipv6_mask,
1207 &flow_tcf_mask_supported.ipv6,
1208 &flow_tcf_mask_empty.ipv6,
1209 sizeof(flow_tcf_mask_supported.ipv6),
1212 if (!eth_type_set || !vlan_eth_type_set)
1213 mnl_attr_put_u16(nlh,
1215 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1216 TCA_FLOWER_KEY_ETH_TYPE,
1217 RTE_BE16(ETH_P_IPV6));
1219 vlan_eth_type_set = 1;
1220 if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1222 spec.ipv6 = items->spec;
1223 if (mask.ipv6->hdr.proto) {
1224 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1225 spec.ipv6->hdr.proto);
1228 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1229 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1230 sizeof(spec.ipv6->hdr.src_addr),
1231 spec.ipv6->hdr.src_addr);
1232 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1233 sizeof(mask.ipv6->hdr.src_addr),
1234 mask.ipv6->hdr.src_addr);
1236 if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1237 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1238 sizeof(spec.ipv6->hdr.dst_addr),
1239 spec.ipv6->hdr.dst_addr);
1240 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1241 sizeof(mask.ipv6->hdr.dst_addr),
1242 mask.ipv6->hdr.dst_addr);
1245 case RTE_FLOW_ITEM_TYPE_UDP:
1246 mask.udp = flow_tcf_item_mask
1247 (items, &rte_flow_item_udp_mask,
1248 &flow_tcf_mask_supported.udp,
1249 &flow_tcf_mask_empty.udp,
1250 sizeof(flow_tcf_mask_supported.udp),
1254 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1256 if (mask.udp == &flow_tcf_mask_empty.udp)
1258 spec.udp = items->spec;
1259 if (mask.udp->hdr.src_port) {
1260 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1261 spec.udp->hdr.src_port);
1262 mnl_attr_put_u16(nlh,
1263 TCA_FLOWER_KEY_UDP_SRC_MASK,
1264 mask.udp->hdr.src_port);
1266 if (mask.udp->hdr.dst_port) {
1267 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1268 spec.udp->hdr.dst_port);
1269 mnl_attr_put_u16(nlh,
1270 TCA_FLOWER_KEY_UDP_DST_MASK,
1271 mask.udp->hdr.dst_port);
1274 case RTE_FLOW_ITEM_TYPE_TCP:
1275 mask.tcp = flow_tcf_item_mask
1276 (items, &rte_flow_item_tcp_mask,
1277 &flow_tcf_mask_supported.tcp,
1278 &flow_tcf_mask_empty.tcp,
1279 sizeof(flow_tcf_mask_supported.tcp),
1283 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1285 if (mask.tcp == &flow_tcf_mask_empty.tcp)
1287 spec.tcp = items->spec;
1288 if (mask.tcp->hdr.src_port) {
1289 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1290 spec.tcp->hdr.src_port);
1291 mnl_attr_put_u16(nlh,
1292 TCA_FLOWER_KEY_TCP_SRC_MASK,
1293 mask.tcp->hdr.src_port);
1295 if (mask.tcp->hdr.dst_port) {
1296 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1297 spec.tcp->hdr.dst_port);
1298 mnl_attr_put_u16(nlh,
1299 TCA_FLOWER_KEY_TCP_DST_MASK,
1300 mask.tcp->hdr.dst_port);
1302 if (mask.tcp->hdr.tcp_flags) {
1305 TCA_FLOWER_KEY_TCP_FLAGS,
1307 (spec.tcp->hdr.tcp_flags));
1310 TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1312 (mask.tcp->hdr.tcp_flags));
1316 return rte_flow_error_set(error, ENOTSUP,
1317 RTE_FLOW_ERROR_TYPE_ITEM,
1318 NULL, "item not supported");
1321 na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1322 na_act_index_cur = 1;
1323 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1324 struct nlattr *na_act_index;
1325 struct nlattr *na_act;
1326 unsigned int vlan_act;
1329 switch (actions->type) {
1330 case RTE_FLOW_ACTION_TYPE_VOID:
1332 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1333 conf.port_id = actions->conf;
1334 if (conf.port_id->original)
1337 for (i = 0; ptoi[i].ifindex; ++i)
1338 if (ptoi[i].port_id == conf.port_id->id)
1340 assert(ptoi[i].ifindex);
1342 mnl_attr_nest_start(nlh, na_act_index_cur++);
1343 assert(na_act_index);
1344 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1345 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1347 mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1348 sizeof(struct tc_mirred),
1349 &(struct tc_mirred){
1350 .action = TC_ACT_STOLEN,
1351 .eaction = TCA_EGRESS_REDIR,
1352 .ifindex = ptoi[i].ifindex,
1354 mnl_attr_nest_end(nlh, na_act);
1355 mnl_attr_nest_end(nlh, na_act_index);
1357 case RTE_FLOW_ACTION_TYPE_DROP:
1359 mnl_attr_nest_start(nlh, na_act_index_cur++);
1360 assert(na_act_index);
1361 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1362 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1364 mnl_attr_put(nlh, TCA_GACT_PARMS,
1365 sizeof(struct tc_gact),
1367 .action = TC_ACT_SHOT,
1369 mnl_attr_nest_end(nlh, na_act);
1370 mnl_attr_nest_end(nlh, na_act_index);
1372 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1373 conf.of_push_vlan = NULL;
1374 vlan_act = TCA_VLAN_ACT_POP;
1375 goto action_of_vlan;
1376 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1377 conf.of_push_vlan = actions->conf;
1378 vlan_act = TCA_VLAN_ACT_PUSH;
1379 goto action_of_vlan;
1380 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1381 conf.of_set_vlan_vid = actions->conf;
1383 goto override_na_vlan_id;
1384 vlan_act = TCA_VLAN_ACT_MODIFY;
1385 goto action_of_vlan;
1386 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1387 conf.of_set_vlan_pcp = actions->conf;
1388 if (na_vlan_priority)
1389 goto override_na_vlan_priority;
1390 vlan_act = TCA_VLAN_ACT_MODIFY;
1391 goto action_of_vlan;
1394 mnl_attr_nest_start(nlh, na_act_index_cur++);
1395 assert(na_act_index);
1396 mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1397 na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1399 mnl_attr_put(nlh, TCA_VLAN_PARMS,
1400 sizeof(struct tc_vlan),
1402 .action = TC_ACT_PIPE,
1403 .v_action = vlan_act,
1405 if (vlan_act == TCA_VLAN_ACT_POP) {
1406 mnl_attr_nest_end(nlh, na_act);
1407 mnl_attr_nest_end(nlh, na_act_index);
1410 if (vlan_act == TCA_VLAN_ACT_PUSH)
1411 mnl_attr_put_u16(nlh,
1412 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1413 conf.of_push_vlan->ethertype);
1414 na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1415 mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1416 na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1417 mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1418 mnl_attr_nest_end(nlh, na_act);
1419 mnl_attr_nest_end(nlh, na_act_index);
1420 if (actions->type ==
1421 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1422 override_na_vlan_id:
1423 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1424 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1426 (conf.of_set_vlan_vid->vlan_vid);
1427 } else if (actions->type ==
1428 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1429 override_na_vlan_priority:
1430 na_vlan_priority->nla_type =
1431 TCA_VLAN_PUSH_VLAN_PRIORITY;
1432 *(uint8_t *)mnl_attr_get_payload
1433 (na_vlan_priority) =
1434 conf.of_set_vlan_pcp->vlan_pcp;
1438 return rte_flow_error_set(error, ENOTSUP,
1439 RTE_FLOW_ERROR_TYPE_ACTION,
1441 "action not supported");
1445 assert(na_flower_act);
1446 mnl_attr_nest_end(nlh, na_flower_act);
1447 mnl_attr_nest_end(nlh, na_flower);
1452 * Send Netlink message with acknowledgment.
1455 * Libmnl socket to use.
1457 * Message to send. This function always raises the NLM_F_ACK flag before
1461 * 0 on success, a negative errno value otherwise and rte_errno is set.
1464 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1466 alignas(struct nlmsghdr)
1467 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1468 nlh->nlmsg_len - sizeof(*nlh)];
1469 uint32_t seq = random();
1472 nlh->nlmsg_flags |= NLM_F_ACK;
1473 nlh->nlmsg_seq = seq;
1474 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1476 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1479 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1487 * Apply flow to E-Switch by sending Netlink message.
1490 * Pointer to Ethernet device.
1491 * @param[in, out] flow
1492 * Pointer to the sub flow.
1494 * Pointer to the error structure.
1497 * 0 on success, a negative errno value otherwise and rte_ernno is set.
1500 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1501 struct rte_flow_error *error)
1503 struct priv *priv = dev->data->dev_private;
1504 struct mnl_socket *nl = priv->mnl_socket;
1505 struct mlx5_flow *dev_flow;
1506 struct nlmsghdr *nlh;
1508 dev_flow = LIST_FIRST(&flow->dev_flows);
1509 /* E-Switch flow can't be expanded. */
1510 assert(!LIST_NEXT(dev_flow, next));
1511 nlh = dev_flow->tcf.nlh;
1512 nlh->nlmsg_type = RTM_NEWTFILTER;
1513 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1514 if (!flow_tcf_nl_ack(nl, nlh))
1516 return rte_flow_error_set(error, rte_errno,
1517 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1518 "netlink: failed to create TC flow rule");
1522 * Remove flow from E-Switch by sending Netlink message.
1525 * Pointer to Ethernet device.
1526 * @param[in, out] flow
1527 * Pointer to the sub flow.
1530 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1532 struct priv *priv = dev->data->dev_private;
1533 struct mnl_socket *nl = priv->mnl_socket;
1534 struct mlx5_flow *dev_flow;
1535 struct nlmsghdr *nlh;
1539 dev_flow = LIST_FIRST(&flow->dev_flows);
1542 /* E-Switch flow can't be expanded. */
1543 assert(!LIST_NEXT(dev_flow, next));
1544 nlh = dev_flow->tcf.nlh;
1545 nlh->nlmsg_type = RTM_DELTFILTER;
1546 nlh->nlmsg_flags = NLM_F_REQUEST;
1547 flow_tcf_nl_ack(nl, nlh);
1551 * Remove flow from E-Switch and release resources of the device flow.
1554 * Pointer to Ethernet device.
1555 * @param[in, out] flow
1556 * Pointer to the sub flow.
1559 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1561 struct mlx5_flow *dev_flow;
1565 flow_tcf_remove(dev, flow);
1566 dev_flow = LIST_FIRST(&flow->dev_flows);
1569 /* E-Switch flow can't be expanded. */
1570 assert(!LIST_NEXT(dev_flow, next));
1571 LIST_REMOVE(dev_flow, next);
1575 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
1576 .validate = flow_tcf_validate,
1577 .prepare = flow_tcf_prepare,
1578 .translate = flow_tcf_translate,
1579 .apply = flow_tcf_apply,
1580 .remove = flow_tcf_remove,
1581 .destroy = flow_tcf_destroy,
1585 * Initialize ingress qdisc of a given network interface.
1588 * Libmnl socket of the @p NETLINK_ROUTE kind.
1590 * Index of network interface to initialize.
1592 * Perform verbose error reporting if not NULL.
1595 * 0 on success, a negative errno value otherwise and rte_errno is set.
1598 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
1599 struct rte_flow_error *error)
1601 struct nlmsghdr *nlh;
1603 alignas(struct nlmsghdr)
1604 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1606 /* Destroy existing ingress qdisc and everything attached to it. */
1607 nlh = mnl_nlmsg_put_header(buf);
1608 nlh->nlmsg_type = RTM_DELQDISC;
1609 nlh->nlmsg_flags = NLM_F_REQUEST;
1610 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1611 tcm->tcm_family = AF_UNSPEC;
1612 tcm->tcm_ifindex = ifindex;
1613 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1614 tcm->tcm_parent = TC_H_INGRESS;
1615 /* Ignore errors when qdisc is already absent. */
1616 if (flow_tcf_nl_ack(nl, nlh) &&
1617 rte_errno != EINVAL && rte_errno != ENOENT)
1618 return rte_flow_error_set(error, rte_errno,
1619 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1620 "netlink: failed to remove ingress"
1622 /* Create fresh ingress qdisc. */
1623 nlh = mnl_nlmsg_put_header(buf);
1624 nlh->nlmsg_type = RTM_NEWQDISC;
1625 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1626 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1627 tcm->tcm_family = AF_UNSPEC;
1628 tcm->tcm_ifindex = ifindex;
1629 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1630 tcm->tcm_parent = TC_H_INGRESS;
1631 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1632 if (flow_tcf_nl_ack(nl, nlh))
1633 return rte_flow_error_set(error, rte_errno,
1634 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1635 "netlink: failed to create ingress"
1641 * Create and configure a libmnl socket for Netlink flow rules.
1644 * A valid libmnl socket object pointer on success, NULL otherwise and
1648 mlx5_flow_tcf_socket_create(void)
1650 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1653 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1655 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1660 mnl_socket_close(nl);
1665 * Destroy a libmnl socket.
1668 * Libmnl socket of the @p NETLINK_ROUTE kind.
1671 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
1673 mnl_socket_close(nl);