4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <sys/queue.h>
38 #include <rte_byteorder.h>
39 #include <rte_jhash.h>
40 #include <rte_malloc.h>
41 #include <rte_eth_tap.h>
43 #include <tap_autoconf.h>
44 #include <tap_tcmsgs.h>
46 #ifndef HAVE_TC_FLOWER
48 * For kernels < 4.2, this enum is not defined. Runtime checks will be made to
49 * avoid sending TC messages the kernel cannot understand.
56 TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
57 TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
58 TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
59 TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
60 TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
61 TCA_FLOWER_KEY_IP_PROTO, /* u8 */
62 TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
63 TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
64 TCA_FLOWER_KEY_IPV4_DST, /* be32 */
65 TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
66 TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
67 TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
68 TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
69 TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
70 TCA_FLOWER_KEY_TCP_SRC, /* be16 */
71 TCA_FLOWER_KEY_TCP_DST, /* be16 */
72 TCA_FLOWER_KEY_UDP_SRC, /* be16 */
73 TCA_FLOWER_KEY_UDP_DST, /* be16 */
76 #ifndef HAVE_TC_VLAN_ID
78 /* TCA_FLOWER_FLAGS, */
79 TCA_FLOWER_KEY_VLAN_ID = TCA_FLOWER_KEY_UDP_DST + 2, /* be16 */
80 TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */
81 TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */
86 LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */
87 struct rte_flow *remote_flow; /* associated remote flow */
95 struct rte_flow *flow;
99 struct rte_flow_attr attr;
100 struct rte_flow_item items[2];
104 static int tap_flow_create_eth(const struct rte_flow_item *item, void *data);
105 static int tap_flow_create_vlan(const struct rte_flow_item *item, void *data);
106 static int tap_flow_create_ipv4(const struct rte_flow_item *item, void *data);
107 static int tap_flow_create_ipv6(const struct rte_flow_item *item, void *data);
108 static int tap_flow_create_udp(const struct rte_flow_item *item, void *data);
109 static int tap_flow_create_tcp(const struct rte_flow_item *item, void *data);
111 tap_flow_validate(struct rte_eth_dev *dev,
112 const struct rte_flow_attr *attr,
113 const struct rte_flow_item items[],
114 const struct rte_flow_action actions[],
115 struct rte_flow_error *error);
117 static struct rte_flow *
118 tap_flow_create(struct rte_eth_dev *dev,
119 const struct rte_flow_attr *attr,
120 const struct rte_flow_item items[],
121 const struct rte_flow_action actions[],
122 struct rte_flow_error *error);
125 tap_flow_destroy(struct rte_eth_dev *dev,
126 struct rte_flow *flow,
127 struct rte_flow_error *error);
129 static const struct rte_flow_ops tap_flow_ops = {
130 .validate = tap_flow_validate,
131 .create = tap_flow_create,
132 .destroy = tap_flow_destroy,
133 .flush = tap_flow_flush,
136 /* Static initializer for items. */
138 (const enum rte_flow_item_type []){ \
139 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
142 /* Structure to generate a simple graph of layers supported by the NIC. */
143 struct tap_flow_items {
144 /* Bit-mask corresponding to what is supported for this item. */
146 const unsigned int mask_sz; /* Bit-mask size in bytes. */
148 * Bit-mask corresponding to the default mask, if none is provided
149 * along with the item.
151 const void *default_mask;
153 * Conversion function from rte_flow to netlink attributes.
156 * rte_flow item to convert.
158 * Internal structure to store the conversion.
161 * 0 on success, negative value otherwise.
163 int (*convert)(const struct rte_flow_item *item, void *data);
164 /** List of possible following items. */
165 const enum rte_flow_item_type *const items;
168 /* Graph of supported items and associated actions. */
169 static const struct tap_flow_items tap_flow_items[] = {
170 [RTE_FLOW_ITEM_TYPE_END] = {
171 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
173 [RTE_FLOW_ITEM_TYPE_ETH] = {
175 RTE_FLOW_ITEM_TYPE_VLAN,
176 RTE_FLOW_ITEM_TYPE_IPV4,
177 RTE_FLOW_ITEM_TYPE_IPV6),
178 .mask = &(const struct rte_flow_item_eth){
179 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
180 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
183 .mask_sz = sizeof(struct rte_flow_item_eth),
184 .default_mask = &rte_flow_item_eth_mask,
185 .convert = tap_flow_create_eth,
187 [RTE_FLOW_ITEM_TYPE_VLAN] = {
188 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
189 RTE_FLOW_ITEM_TYPE_IPV6),
190 .mask = &(const struct rte_flow_item_vlan){
192 /* DEI matching is not supported */
193 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
199 .mask_sz = sizeof(struct rte_flow_item_vlan),
200 .default_mask = &rte_flow_item_vlan_mask,
201 .convert = tap_flow_create_vlan,
203 [RTE_FLOW_ITEM_TYPE_IPV4] = {
204 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
205 RTE_FLOW_ITEM_TYPE_TCP),
206 .mask = &(const struct rte_flow_item_ipv4){
213 .mask_sz = sizeof(struct rte_flow_item_ipv4),
214 .default_mask = &rte_flow_item_ipv4_mask,
215 .convert = tap_flow_create_ipv4,
217 [RTE_FLOW_ITEM_TYPE_IPV6] = {
218 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
219 RTE_FLOW_ITEM_TYPE_TCP),
220 .mask = &(const struct rte_flow_item_ipv6){
223 "\xff\xff\xff\xff\xff\xff\xff\xff"
224 "\xff\xff\xff\xff\xff\xff\xff\xff",
227 "\xff\xff\xff\xff\xff\xff\xff\xff"
228 "\xff\xff\xff\xff\xff\xff\xff\xff",
233 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234 .default_mask = &rte_flow_item_ipv6_mask,
235 .convert = tap_flow_create_ipv6,
237 [RTE_FLOW_ITEM_TYPE_UDP] = {
238 .mask = &(const struct rte_flow_item_udp){
244 .mask_sz = sizeof(struct rte_flow_item_udp),
245 .default_mask = &rte_flow_item_udp_mask,
246 .convert = tap_flow_create_udp,
248 [RTE_FLOW_ITEM_TYPE_TCP] = {
249 .mask = &(const struct rte_flow_item_tcp){
255 .mask_sz = sizeof(struct rte_flow_item_tcp),
256 .default_mask = &rte_flow_item_tcp_mask,
257 .convert = tap_flow_create_tcp,
261 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
262 [TAP_REMOTE_LOCAL_MAC] = {
265 .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC,
269 .type = RTE_FLOW_ITEM_TYPE_ETH,
270 .mask = &(const struct rte_flow_item_eth){
271 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
275 .type = RTE_FLOW_ITEM_TYPE_END,
277 .mirred = TCA_EGRESS_REDIR,
279 [TAP_REMOTE_BROADCAST] = {
282 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST,
286 .type = RTE_FLOW_ITEM_TYPE_ETH,
287 .mask = &(const struct rte_flow_item_eth){
288 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290 .spec = &(const struct rte_flow_item_eth){
291 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 .type = RTE_FLOW_ITEM_TYPE_END,
297 .mirred = TCA_EGRESS_MIRROR,
299 [TAP_REMOTE_BROADCASTV6] = {
302 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6,
306 .type = RTE_FLOW_ITEM_TYPE_ETH,
307 .mask = &(const struct rte_flow_item_eth){
308 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
310 .spec = &(const struct rte_flow_item_eth){
311 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
315 .type = RTE_FLOW_ITEM_TYPE_END,
317 .mirred = TCA_EGRESS_MIRROR,
319 [TAP_REMOTE_PROMISC] = {
322 .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC,
326 .type = RTE_FLOW_ITEM_TYPE_VOID,
329 .type = RTE_FLOW_ITEM_TYPE_END,
331 .mirred = TCA_EGRESS_MIRROR,
333 [TAP_REMOTE_ALLMULTI] = {
336 .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI,
340 .type = RTE_FLOW_ITEM_TYPE_ETH,
341 .mask = &(const struct rte_flow_item_eth){
342 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
344 .spec = &(const struct rte_flow_item_eth){
345 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
349 .type = RTE_FLOW_ITEM_TYPE_END,
351 .mirred = TCA_EGRESS_MIRROR,
356 .priority = TAP_REMOTE_TX,
360 .type = RTE_FLOW_ITEM_TYPE_VOID,
363 .type = RTE_FLOW_ITEM_TYPE_END,
365 .mirred = TCA_EGRESS_MIRROR,
370 * Make as much checks as possible on an Ethernet item, and if a flow is
371 * provided, fill it appropriately with Ethernet info.
374 * Item specification.
375 * @param[in, out] data
376 * Additional data structure to tell next layers we've been here.
379 * 0 if checks are alright, -1 otherwise.
382 tap_flow_create_eth(const struct rte_flow_item *item, void *data)
384 struct convert_data *info = (struct convert_data *)data;
385 const struct rte_flow_item_eth *spec = item->spec;
386 const struct rte_flow_item_eth *mask = item->mask;
387 struct rte_flow *flow = info->flow;
390 /* use default mask if none provided */
392 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask;
393 /* TC does not support eth_type masking. Only accept if exact match. */
394 if (mask->type && mask->type != 0xffff)
398 /* store eth_type for consistency if ipv4/6 pattern item comes next */
399 if (spec->type & mask->type)
400 info->eth_type = spec->type;
404 if (!is_zero_ether_addr(&spec->dst)) {
405 nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN,
406 &spec->dst.addr_bytes);
408 TCA_FLOWER_KEY_ETH_DST_MASK, ETHER_ADDR_LEN,
409 &mask->dst.addr_bytes);
411 if (!is_zero_ether_addr(&mask->src)) {
412 nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, ETHER_ADDR_LEN,
413 &spec->src.addr_bytes);
415 TCA_FLOWER_KEY_ETH_SRC_MASK, ETHER_ADDR_LEN,
416 &mask->src.addr_bytes);
422 * Make as much checks as possible on a VLAN item, and if a flow is provided,
423 * fill it appropriately with VLAN info.
426 * Item specification.
427 * @param[in, out] data
428 * Additional data structure to tell next layers we've been here.
431 * 0 if checks are alright, -1 otherwise.
434 tap_flow_create_vlan(const struct rte_flow_item *item, void *data)
436 struct convert_data *info = (struct convert_data *)data;
437 const struct rte_flow_item_vlan *spec = item->spec;
438 const struct rte_flow_item_vlan *mask = item->mask;
439 struct rte_flow *flow = info->flow;
442 /* use default mask if none provided */
444 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask;
445 /* TC does not support tpid masking. Only accept if exact match. */
446 if (mask->tpid && mask->tpid != 0xffff)
448 /* Double-tagging not supported. */
449 if (spec && mask->tpid && spec->tpid != htons(ETH_P_8021Q))
455 msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q));
456 #define VLAN_PRIO(tci) ((tci) >> 13)
457 #define VLAN_ID(tci) ((tci) & 0xfff)
461 uint16_t tci = ntohs(spec->tci) & mask->tci;
462 uint16_t prio = VLAN_PRIO(tci);
463 uint8_t vid = VLAN_ID(tci);
466 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_VLAN_PRIO, prio);
468 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_VLAN_ID, vid);
474 * Make as much checks as possible on an IPv4 item, and if a flow is provided,
475 * fill it appropriately with IPv4 info.
478 * Item specification.
479 * @param[in, out] data
480 * Additional data structure to tell next layers we've been here.
483 * 0 if checks are alright, -1 otherwise.
486 tap_flow_create_ipv4(const struct rte_flow_item *item, void *data)
488 struct convert_data *info = (struct convert_data *)data;
489 const struct rte_flow_item_ipv4 *spec = item->spec;
490 const struct rte_flow_item_ipv4 *mask = item->mask;
491 struct rte_flow *flow = info->flow;
494 /* use default mask if none provided */
496 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask;
497 /* check that previous eth type is compatible with ipv4 */
498 if (info->eth_type && info->eth_type != htons(ETH_P_IP))
500 /* store ip_proto for consistency if udp/tcp pattern item comes next */
502 info->ip_proto = spec->hdr.next_proto_id;
507 info->eth_type = htons(ETH_P_IP);
510 if (spec->hdr.dst_addr) {
511 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST,
513 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK,
516 if (spec->hdr.src_addr) {
517 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC,
519 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK,
522 if (spec->hdr.next_proto_id)
523 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO,
524 spec->hdr.next_proto_id);
529 * Make as much checks as possible on an IPv6 item, and if a flow is provided,
530 * fill it appropriately with IPv6 info.
533 * Item specification.
534 * @param[in, out] data
535 * Additional data structure to tell next layers we've been here.
538 * 0 if checks are alright, -1 otherwise.
541 tap_flow_create_ipv6(const struct rte_flow_item *item, void *data)
543 struct convert_data *info = (struct convert_data *)data;
544 const struct rte_flow_item_ipv6 *spec = item->spec;
545 const struct rte_flow_item_ipv6 *mask = item->mask;
546 struct rte_flow *flow = info->flow;
547 uint8_t empty_addr[16] = { 0 };
550 /* use default mask if none provided */
552 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask;
553 /* check that previous eth type is compatible with ipv6 */
554 if (info->eth_type && info->eth_type != htons(ETH_P_IPV6))
556 /* store ip_proto for consistency if udp/tcp pattern item comes next */
558 info->ip_proto = spec->hdr.proto;
563 info->eth_type = htons(ETH_P_IPV6);
566 if (memcmp(spec->hdr.dst_addr, empty_addr, 16)) {
567 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST,
568 sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr);
569 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK,
570 sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr);
572 if (memcmp(spec->hdr.src_addr, empty_addr, 16)) {
573 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC,
574 sizeof(spec->hdr.src_addr), &spec->hdr.src_addr);
575 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
576 sizeof(mask->hdr.src_addr), &mask->hdr.src_addr);
579 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto);
584 * Make as much checks as possible on a UDP item, and if a flow is provided,
585 * fill it appropriately with UDP info.
588 * Item specification.
589 * @param[in, out] data
590 * Additional data structure to tell next layers we've been here.
593 * 0 if checks are alright, -1 otherwise.
596 tap_flow_create_udp(const struct rte_flow_item *item, void *data)
598 struct convert_data *info = (struct convert_data *)data;
599 const struct rte_flow_item_udp *spec = item->spec;
600 const struct rte_flow_item_udp *mask = item->mask;
601 struct rte_flow *flow = info->flow;
604 /* use default mask if none provided */
606 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask;
607 /* check that previous ip_proto is compatible with udp */
608 if (info->ip_proto && info->ip_proto != IPPROTO_UDP)
610 /* TC does not support UDP port masking. Only accept if exact match. */
611 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
612 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
617 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP);
620 if (spec->hdr.dst_port & mask->hdr.dst_port)
621 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST,
623 if (spec->hdr.src_port & mask->hdr.src_port)
624 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC,
630 * Make as much checks as possible on a TCP item, and if a flow is provided,
631 * fill it appropriately with TCP info.
634 * Item specification.
635 * @param[in, out] data
636 * Additional data structure to tell next layers we've been here.
639 * 0 if checks are alright, -1 otherwise.
642 tap_flow_create_tcp(const struct rte_flow_item *item, void *data)
644 struct convert_data *info = (struct convert_data *)data;
645 const struct rte_flow_item_tcp *spec = item->spec;
646 const struct rte_flow_item_tcp *mask = item->mask;
647 struct rte_flow *flow = info->flow;
650 /* use default mask if none provided */
652 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask;
653 /* check that previous ip_proto is compatible with tcp */
654 if (info->ip_proto && info->ip_proto != IPPROTO_TCP)
656 /* TC does not support TCP port masking. Only accept if exact match. */
657 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
658 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
663 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP);
666 if (spec->hdr.dst_port & mask->hdr.dst_port)
667 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST,
669 if (spec->hdr.src_port & mask->hdr.src_port)
670 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC,
676 * Check support for a given item.
679 * Item specification.
681 * Bit-Mask size in bytes.
682 * @param[in] supported_mask
683 * Bit-mask covering supported fields to compare with spec, last and mask in
685 * @param[in] default_mask
686 * Bit-mask default mask if none is provided in \item.
692 tap_flow_item_validate(const struct rte_flow_item *item,
694 const uint8_t *supported_mask,
695 const uint8_t *default_mask)
699 /* An empty layer is allowed, as long as all fields are NULL */
700 if (!item->spec && (item->mask || item->last))
702 /* Is the item spec compatible with what the NIC supports? */
703 if (item->spec && !item->mask) {
705 const uint8_t *spec = item->spec;
707 for (i = 0; i < size; ++i)
708 if ((spec[i] | supported_mask[i]) != supported_mask[i])
710 /* Is the default mask compatible with what the NIC supports? */
711 for (i = 0; i < size; i++)
712 if ((default_mask[i] | supported_mask[i]) !=
716 /* Is the item last compatible with what the NIC supports? */
717 if (item->last && !item->mask) {
719 const uint8_t *spec = item->last;
721 for (i = 0; i < size; ++i)
722 if ((spec[i] | supported_mask[i]) != supported_mask[i])
725 /* Is the item mask compatible with what the NIC supports? */
728 const uint8_t *spec = item->mask;
730 for (i = 0; i < size; ++i)
731 if ((spec[i] | supported_mask[i]) != supported_mask[i])
735 * Once masked, Are item spec and item last equal?
736 * TC does not support range so anything else is invalid.
738 if (item->spec && item->last) {
741 const uint8_t *apply = default_mask;
746 for (i = 0; i < size; ++i) {
747 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
748 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
750 ret = memcmp(spec, last, size);
756 * Transform a DROP/PASSTHRU action item in the provided flow for TC.
758 * @param[in, out] flow
761 * Appropriate action to be set in the TCA_GACT_PARMS structure.
764 * 0 if checks are alright, -1 otherwise.
767 add_action_gact(struct rte_flow *flow, int action)
769 struct nlmsg *msg = &flow->msg;
770 size_t act_index = 1;
775 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0)
777 if (nlattr_nested_start(msg, act_index++) < 0)
779 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("gact"), "gact");
780 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
782 nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(p), &p);
783 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */
784 nlattr_nested_finish(msg); /* nested act_index */
785 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */
790 * Transform a MIRRED action item in the provided flow for TC.
792 * @param[in, out] flow
795 * Netdevice ifindex, where to mirror/redirect packet to.
796 * @param[in] action_type
797 * Either TCA_EGRESS_REDIR for redirection or TCA_EGRESS_MIRROR for mirroring.
800 * 0 if checks are alright, -1 otherwise.
803 add_action_mirred(struct rte_flow *flow, uint16_t ifindex, uint16_t action_type)
805 struct nlmsg *msg = &flow->msg;
806 size_t act_index = 1;
807 struct tc_mirred p = {
808 .eaction = action_type,
812 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0)
814 if (nlattr_nested_start(msg, act_index++) < 0)
816 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("mirred"), "mirred");
817 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
819 if (action_type == TCA_EGRESS_MIRROR)
820 p.action = TC_ACT_PIPE;
822 p.action = TC_ACT_STOLEN;
823 nlattr_add(&msg->nh, TCA_MIRRED_PARMS, sizeof(p), &p);
824 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */
825 nlattr_nested_finish(msg); /* nested act_index */
826 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */
831 * Transform a QUEUE action item in the provided flow for TC.
833 * @param[in, out] flow
839 * 0 if checks are alright, -1 otherwise.
842 add_action_skbedit(struct rte_flow *flow, uint16_t queue)
844 struct nlmsg *msg = &flow->msg;
845 size_t act_index = 1;
846 struct tc_skbedit p = {
847 .action = TC_ACT_PIPE
850 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0)
852 if (nlattr_nested_start(msg, act_index++) < 0)
854 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("skbedit"), "skbedit");
855 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
857 nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, sizeof(p), &p);
858 nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, queue);
859 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */
860 nlattr_nested_finish(msg); /* nested act_index */
861 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */
866 * Validate a flow supported by TC.
867 * If flow param is not NULL, then also fill the netlink message inside.
870 * Pointer to private structure.
872 * Flow rule attributes.
874 * Pattern specification (list terminated by the END pattern item).
876 * Associated actions (list terminated by the END action).
878 * Perform verbose error reporting if not NULL.
879 * @param[in, out] flow
880 * Flow structure to update.
882 * If set to TCA_EGRESS_REDIR, provided actions will be replaced with a
883 * redirection to the tap netdevice, and the TC rule will be configured
884 * on the remote netdevice in pmd.
885 * If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a
886 * mirroring to the tap netdevice, and the TC rule will be configured
887 * on the remote netdevice in pmd. Matching packets will thus be duplicated.
888 * If set to 0, the standard behavior is to be used: set correct actions for
889 * the TC rule, and apply it on the tap netdevice.
892 * 0 on success, a negative errno value otherwise and rte_errno is set.
895 priv_flow_process(struct pmd_internals *pmd,
896 const struct rte_flow_attr *attr,
897 const struct rte_flow_item items[],
898 const struct rte_flow_action actions[],
899 struct rte_flow_error *error,
900 struct rte_flow *flow,
903 const struct tap_flow_items *cur_item = tap_flow_items;
904 struct convert_data data = {
909 int action = 0; /* Only one action authorized for now */
911 if (attr->group > MAX_GROUP) {
913 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
914 NULL, "group value too big: cannot exceed 15");
917 if (attr->priority > MAX_PRIORITY) {
919 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
920 NULL, "priority value too big");
923 uint16_t group = attr->group << GROUP_SHIFT;
924 uint16_t prio = group | (attr->priority + PRIORITY_OFFSET);
925 flow->msg.t.tcm_info = TC_H_MAKE(prio << 16,
926 flow->msg.t.tcm_info);
931 * If attr->ingress, the rule applies on remote ingress
932 * to match incoming packets
933 * If attr->egress, the rule applies on tap ingress (as
934 * seen from the kernel) to deal with packets going out
937 flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0);
939 /* Standard rule on tap egress (kernel standpoint). */
940 flow->msg.t.tcm_parent =
941 TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
943 /* use flower filter type */
944 nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower");
945 if (nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0)
946 goto exit_item_not_supported;
948 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
949 const struct tap_flow_items *token = NULL;
953 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
957 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
959 if (cur_item->items[i] == items->type) {
960 token = &tap_flow_items[items->type];
965 goto exit_item_not_supported;
967 err = tap_flow_item_validate(
968 items, cur_item->mask_sz,
969 (const uint8_t *)cur_item->mask,
970 (const uint8_t *)cur_item->default_mask);
972 goto exit_item_not_supported;
973 if (flow && cur_item->convert) {
974 if (!pmd->flower_vlan_support &&
975 cur_item->convert == tap_flow_create_vlan)
976 goto exit_item_not_supported;
977 err = cur_item->convert(items, &data);
979 goto exit_item_not_supported;
983 if (pmd->flower_vlan_support && data.vlan) {
984 nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
986 nlattr_add16(&flow->msg.nh,
987 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
989 data.eth_type : htons(ETH_P_ALL));
990 } else if (data.eth_type) {
991 nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
995 if (mirred && flow) {
996 uint16_t if_index = pmd->if_index;
999 * If attr->egress && mirred, then this is a special
1000 * case where the rule must be applied on the tap, to
1001 * redirect packets coming from the DPDK App, out
1002 * through the remote netdevice.
1005 if_index = pmd->remote_if_index;
1006 if (add_action_mirred(flow, if_index, mirred) < 0)
1007 goto exit_action_not_supported;
1011 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1014 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1016 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1018 goto exit_action_not_supported;
1021 err = add_action_gact(flow, TC_ACT_SHOT);
1022 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) {
1024 goto exit_action_not_supported;
1027 err = add_action_gact(flow, TC_ACT_UNSPEC);
1028 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1029 const struct rte_flow_action_queue *queue =
1030 (const struct rte_flow_action_queue *)
1033 goto exit_action_not_supported;
1035 if (!queue || (queue->index >= pmd->nb_queues))
1036 goto exit_action_not_supported;
1038 err = add_action_skbedit(flow, queue->index);
1040 goto exit_action_not_supported;
1043 goto exit_action_not_supported;
1047 nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */
1049 exit_item_not_supported:
1050 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
1051 items, "item not supported");
1053 exit_action_not_supported:
1054 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
1055 actions, "action not supported");
1064 * @see rte_flow_validate()
1068 tap_flow_validate(struct rte_eth_dev *dev,
1069 const struct rte_flow_attr *attr,
1070 const struct rte_flow_item items[],
1071 const struct rte_flow_action actions[],
1072 struct rte_flow_error *error)
1074 struct pmd_internals *pmd = dev->data->dev_private;
1076 return priv_flow_process(pmd, attr, items, actions, error, NULL, 0);
1080 * Set a unique handle in a flow.
1082 * The kernel supports TC rules with equal priority, as long as they use the
1083 * same matching fields (e.g.: dst mac and ipv4) with different values (and
1084 * full mask to ensure no collision is possible).
1085 * In those rules, the handle (uint32_t) is the part that would identify
1086 * specifically each rule.
1088 * On 32-bit architectures, the handle can simply be the flow's pointer address.
1089 * On 64-bit architectures, we rely on jhash(flow) to find a (sufficiently)
1092 * @param[in, out] flow
1093 * The flow that needs its handle set.
1096 tap_flow_set_handle(struct rte_flow *flow)
1098 uint32_t handle = 0;
1100 if (sizeof(flow) > 4)
1101 handle = rte_jhash(&flow, sizeof(flow), 1);
1103 handle = (uintptr_t)flow;
1104 /* must be at least 1 to avoid letting the kernel choose one for us */
1107 flow->msg.t.tcm_handle = handle;
1113 * @see rte_flow_create()
1116 static struct rte_flow *
1117 tap_flow_create(struct rte_eth_dev *dev,
1118 const struct rte_flow_attr *attr,
1119 const struct rte_flow_item items[],
1120 const struct rte_flow_action actions[],
1121 struct rte_flow_error *error)
1123 struct pmd_internals *pmd = dev->data->dev_private;
1124 struct rte_flow *remote_flow = NULL;
1125 struct rte_flow *flow = NULL;
1126 struct nlmsg *msg = NULL;
1129 if (!pmd->if_index) {
1130 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1132 "can't create rule, ifindex not found");
1136 * No rules configured through standard rte_flow should be set on the
1137 * priorities used by implicit rules.
1139 if ((attr->group == MAX_GROUP) &&
1140 attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) {
1142 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1143 NULL, "priority value too big");
1146 flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
1148 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1149 NULL, "cannot allocate memory for rte_flow");
1153 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER,
1154 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1155 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1156 tap_flow_set_handle(flow);
1157 if (priv_flow_process(pmd, attr, items, actions, error, flow, 0))
1159 err = nl_send(pmd->nlsk_fd, &msg->nh);
1161 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1162 NULL, "couldn't send request to kernel");
1165 err = nl_recv_ack(pmd->nlsk_fd);
1168 "Kernel refused TC filter rule creation (%d): %s\n",
1169 errno, strerror(errno));
1170 rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE,
1171 NULL, "overlapping rules");
1174 LIST_INSERT_HEAD(&pmd->flows, flow, next);
1176 * If a remote device is configured, a TC rule with identical items for
1177 * matching must be set on that device, with a single action: redirect
1178 * to the local pmd->if_index.
1180 if (pmd->remote_if_index) {
1181 remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
1184 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1185 "cannot allocate memory for rte_flow");
1188 msg = &remote_flow->msg;
1189 /* set the rule if_index for the remote netdevice */
1191 msg, pmd->remote_if_index, RTM_NEWTFILTER,
1192 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1193 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1194 tap_flow_set_handle(remote_flow);
1195 if (priv_flow_process(pmd, attr, items, NULL,
1196 error, remote_flow, TCA_EGRESS_REDIR)) {
1198 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1199 NULL, "rte flow rule validation failed");
1202 err = nl_send(pmd->nlsk_fd, &msg->nh);
1205 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1206 NULL, "Failure sending nl request");
1209 err = nl_recv_ack(pmd->nlsk_fd);
1212 "Kernel refused TC filter rule creation (%d): %s\n",
1213 errno, strerror(errno));
1215 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1216 NULL, "overlapping rules");
1219 flow->remote_flow = remote_flow;
1224 rte_free(remote_flow);
1231 * Destroy a flow using pointer to pmd_internal.
1233 * @param[in, out] pmd
1234 * Pointer to private structure.
1236 * Pointer to the flow to destroy.
1237 * @param[in, out] error
1238 * Pointer to the flow error handler
1240 * @return 0 if the flow could be destroyed, -1 otherwise.
1243 tap_flow_destroy_pmd(struct pmd_internals *pmd,
1244 struct rte_flow *flow,
1245 struct rte_flow_error *error)
1247 struct rte_flow *remote_flow = flow->remote_flow;
1250 LIST_REMOVE(flow, next);
1251 flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1252 flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1254 ret = nl_send(pmd->nlsk_fd, &flow->msg.nh);
1256 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1257 NULL, "couldn't send request to kernel");
1260 ret = nl_recv_ack(pmd->nlsk_fd);
1261 /* If errno is ENOENT, the rule is already no longer in the kernel. */
1262 if (ret < 0 && errno == ENOENT)
1266 "Kernel refused TC filter rule deletion (%d): %s\n",
1267 errno, strerror(errno));
1269 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1270 "couldn't receive kernel ack to our request");
1274 remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1275 remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1277 ret = nl_send(pmd->nlsk_fd, &remote_flow->msg.nh);
1280 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1281 NULL, "Failure sending nl request");
1284 ret = nl_recv_ack(pmd->nlsk_fd);
1285 if (ret < 0 && errno == ENOENT)
1289 "Kernel refused TC filter rule deletion (%d): %s\n",
1290 errno, strerror(errno));
1292 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1293 NULL, "Failure trying to receive nl ack");
1299 rte_free(remote_flow);
1307 * @see rte_flow_destroy()
1311 tap_flow_destroy(struct rte_eth_dev *dev,
1312 struct rte_flow *flow,
1313 struct rte_flow_error *error)
1315 struct pmd_internals *pmd = dev->data->dev_private;
1317 return tap_flow_destroy_pmd(pmd, flow, error);
1321 * Destroy all flows.
1323 * @see rte_flow_flush()
1327 tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
1329 struct pmd_internals *pmd = dev->data->dev_private;
1330 struct rte_flow *flow;
1332 while (!LIST_EMPTY(&pmd->flows)) {
1333 flow = LIST_FIRST(&pmd->flows);
1334 if (tap_flow_destroy(dev, flow, error) < 0)
1341 * Add an implicit flow rule on the remote device to make sure traffic gets to
1342 * the tap netdevice from there.
1345 * Pointer to private structure.
1347 * The idx in the implicit_rte_flows array specifying which rule to apply.
1349 * @return -1 if the rule couldn't be applied, 0 otherwise.
1351 int tap_flow_implicit_create(struct pmd_internals *pmd,
1352 enum implicit_rule_index idx)
1354 struct rte_flow_item *items = implicit_rte_flows[idx].items;
1355 struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
1356 struct rte_flow_item_eth eth_local = { .type = 0 };
1357 uint16_t if_index = pmd->remote_if_index;
1358 struct rte_flow *remote_flow = NULL;
1359 struct nlmsg *msg = NULL;
1361 struct rte_flow_item items_local[2] = {
1363 .type = items[0].type,
1365 .mask = items[0].mask,
1368 .type = items[1].type,
1372 remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
1374 RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow");
1377 msg = &remote_flow->msg;
1378 if (idx == TAP_REMOTE_TX) {
1379 if_index = pmd->if_index;
1380 } else if (idx == TAP_REMOTE_LOCAL_MAC) {
1382 * eth addr couldn't be set in implicit_rte_flows[] as it is not
1383 * known at compile time.
1385 memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr));
1386 items = items_local;
1388 tc_init_msg(msg, if_index, RTM_NEWTFILTER,
1389 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1390 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1391 tap_flow_set_handle(remote_flow);
1392 if (priv_flow_process(pmd, attr, items, NULL, NULL,
1393 remote_flow, implicit_rte_flows[idx].mirred)) {
1394 RTE_LOG(ERR, PMD, "rte flow rule validation failed\n");
1397 err = nl_send(pmd->nlsk_fd, &msg->nh);
1399 RTE_LOG(ERR, PMD, "Failure sending nl request");
1402 err = nl_recv_ack(pmd->nlsk_fd);
1405 "Kernel refused TC filter rule creation (%d): %s\n",
1406 errno, strerror(errno));
1409 LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next);
1413 rte_free(remote_flow);
1418 * Remove specific implicit flow rule on the remote device.
1420 * @param[in, out] pmd
1421 * Pointer to private structure.
1423 * The idx in the implicit_rte_flows array specifying which rule to remove.
1425 * @return -1 if one of the implicit rules couldn't be created, 0 otherwise.
1427 int tap_flow_implicit_destroy(struct pmd_internals *pmd,
1428 enum implicit_rule_index idx)
1430 struct rte_flow *remote_flow;
1432 int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET;
1434 for (remote_flow = LIST_FIRST(&pmd->implicit_flows);
1436 remote_flow = LIST_NEXT(remote_flow, next)) {
1437 cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK;
1438 if (cur_prio != idx_prio)
1440 return tap_flow_destroy_pmd(pmd, remote_flow, NULL);
1446 * Destroy all implicit flows.
1448 * @see rte_flow_flush()
1451 tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error)
1453 struct rte_flow *remote_flow;
1455 while (!LIST_EMPTY(&pmd->implicit_flows)) {
1456 remote_flow = LIST_FIRST(&pmd->implicit_flows);
1457 if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0)
1464 * Manage filter operations.
1467 * Pointer to Ethernet device structure.
1468 * @param filter_type
1471 * Operation to perform.
1473 * Pointer to operation-specific structure.
1476 * 0 on success, negative errno value on failure.
1479 tap_dev_filter_ctrl(struct rte_eth_dev *dev,
1480 enum rte_filter_type filter_type,
1481 enum rte_filter_op filter_op,
1484 struct pmd_internals *pmd = dev->data->dev_private;
1486 if (!pmd->flower_support)
1488 switch (filter_type) {
1489 case RTE_ETH_FILTER_GENERIC:
1490 if (filter_op != RTE_ETH_FILTER_GET)
1492 *(const void **)arg = &tap_flow_ops;
1495 RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported",
1496 (void *)dev, filter_type);