net/mlx5: add VXLAN to flow prepare routine
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/socket.h>
24
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
28 #include <rte_flow.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31
32 #include "mlx5.h"
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
35
36 #ifdef HAVE_TC_ACT_VLAN
37
38 #include <linux/tc_act/tc_vlan.h>
39
40 #else /* HAVE_TC_ACT_VLAN */
41
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
50
51 struct tc_vlan {
52         tc_gen;
53         int v_action;
54 };
55
56 #endif /* HAVE_TC_ACT_VLAN */
57
58 #ifdef HAVE_TC_ACT_PEDIT
59
60 #include <linux/tc_act/tc_pedit.h>
61
62 #else /* HAVE_TC_ACT_VLAN */
63
64 enum {
65         TCA_PEDIT_UNSPEC,
66         TCA_PEDIT_TM,
67         TCA_PEDIT_PARMS,
68         TCA_PEDIT_PAD,
69         TCA_PEDIT_PARMS_EX,
70         TCA_PEDIT_KEYS_EX,
71         TCA_PEDIT_KEY_EX,
72         __TCA_PEDIT_MAX
73 };
74
75 enum {
76         TCA_PEDIT_KEY_EX_HTYPE = 1,
77         TCA_PEDIT_KEY_EX_CMD = 2,
78         __TCA_PEDIT_KEY_EX_MAX
79 };
80
81 enum pedit_header_type {
82         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
88         __PEDIT_HDR_TYPE_MAX,
89 };
90
91 enum pedit_cmd {
92         TCA_PEDIT_KEY_EX_CMD_SET = 0,
93         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
94         __PEDIT_CMD_MAX,
95 };
96
97 struct tc_pedit_key {
98         __u32 mask; /* AND */
99         __u32 val; /*XOR */
100         __u32 off; /*offset */
101         __u32 at;
102         __u32 offmask;
103         __u32 shift;
104 };
105
106 __extension__
107 struct tc_pedit_sel {
108         tc_gen;
109         unsigned char nkeys;
110         unsigned char flags;
111         struct tc_pedit_key keys[0];
112 };
113
114 #endif /* HAVE_TC_ACT_VLAN */
115
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
117
118 #include <linux/tc_act/tc_tunnel_key.h>
119
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
122 #endif
123
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
126 #endif
127
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
129
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
141
142 struct tc_tunnel_key {
143         tc_gen;
144         int t_action;
145 };
146
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
148
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
152 #endif
153
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
157 #endif
158
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
162 #endif
163 #ifndef HAVE_TCA_CHAIN
164 #define TCA_CHAIN 11
165 #endif
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
168 #endif
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
216 #endif
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
219 #endif
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
222 #endif
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
225 #endif
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
228 #endif
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
231 #endif
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
234 #endif
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
237 #endif
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
240 #endif
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
243 #endif
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
246 #endif
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
249 #endif
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
252 #endif
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
255 #endif
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
258 #endif
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
261 #endif
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
264 #endif
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
267 #endif
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
270 #endif
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
273 #endif
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
276 #endif
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
279 #endif
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
282 #endif
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
285 #endif
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
288 #endif
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
291 #endif
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
294 #endif
295
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
298 #endif
299
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
302 #endif
303
304 #ifndef TP_PORT_LEN
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
306 #endif
307
308 #ifndef TTL_LEN
309 #define TTL_LEN 1
310 #endif
311
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
314 #endif
315
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
320
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323         FLOW_TCF_TUNACT_VXLAN_DECAP,
324         FLOW_TCF_TUNACT_VXLAN_ENCAP,
325 };
326
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
337
338 /**
339  * Structure for holding netlink context.
340  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341  * Using this (8KB) buffer size ensures that netlink messages will never be
342  * truncated.
343  */
344 struct mlx5_flow_tcf_context {
345         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346         uint32_t seq; /* Message sequence number. */
347         uint32_t buf_size; /* Message buffer size. */
348         uint8_t *buf; /* Message buffer. */
349 };
350
351 /**
352  * Neigh rule structure. The neigh rule is applied via Netlink to
353  * outer tunnel iface in order to provide destination MAC address
354  * for the VXLAN encapsultion. The neigh rule is implicitly related
355  * to the Flow itself and can be shared by multiple Flows.
356  */
357 struct tcf_neigh_rule {
358         LIST_ENTRY(tcf_neigh_rule) next;
359         uint32_t refcnt;
360         struct ether_addr eth;
361         uint16_t mask;
362         union {
363                 struct {
364                         rte_be32_t dst;
365                 } ipv4;
366                 struct {
367                         uint8_t dst[IPV6_ADDR_LEN];
368                 } ipv6;
369         };
370 };
371
372 /**
373  * Local rule structure. The local rule is applied via Netlink to
374  * outer tunnel iface in order to provide local and peer IP addresses
375  * of the VXLAN tunnel for encapsulation. The local rule is implicitly
376  * related to the Flow itself and can be shared by multiple Flows.
377  */
378 struct tcf_local_rule {
379         LIST_ENTRY(tcf_local_rule) next;
380         uint32_t refcnt;
381         uint16_t mask;
382         union {
383                 struct {
384                         rte_be32_t dst;
385                         rte_be32_t src;
386                 } ipv4;
387                 struct {
388                         uint8_t dst[IPV6_ADDR_LEN];
389                         uint8_t src[IPV6_ADDR_LEN];
390                 } ipv6;
391         };
392 };
393
394 /** VXLAN virtual netdev. */
395 struct tcf_vtep {
396         LIST_ENTRY(tcf_vtep) next;
397         LIST_HEAD(, tcf_neigh_rule) neigh;
398         LIST_HEAD(, tcf_local_rule) local;
399         uint32_t refcnt;
400         unsigned int ifindex; /**< Own interface index. */
401         unsigned int ifouter; /**< Index of device attached to. */
402         uint16_t port;
403         uint8_t created;
404 };
405
406 /** Tunnel descriptor header, common for all tunnel types. */
407 struct flow_tcf_tunnel_hdr {
408         uint32_t type; /**< Tunnel action type. */
409         struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
410         unsigned int ifindex_org; /**< Original dst/src interface */
411         unsigned int *ifindex_ptr; /**< Interface ptr in message. */
412 };
413
414 struct flow_tcf_vxlan_decap {
415         struct flow_tcf_tunnel_hdr hdr;
416         uint16_t udp_port;
417 };
418
419 struct flow_tcf_vxlan_encap {
420         struct flow_tcf_tunnel_hdr hdr;
421         uint32_t mask;
422         struct {
423                 struct ether_addr dst;
424                 struct ether_addr src;
425         } eth;
426         union {
427                 struct {
428                         rte_be32_t dst;
429                         rte_be32_t src;
430                 } ipv4;
431                 struct {
432                         uint8_t dst[IPV6_ADDR_LEN];
433                         uint8_t src[IPV6_ADDR_LEN];
434                 } ipv6;
435         };
436 struct {
437                 rte_be16_t src;
438                 rte_be16_t dst;
439         } udp;
440         struct {
441                 uint8_t vni[3];
442         } vxlan;
443 };
444
445 /** Structure used when extracting the values of a flow counters
446  * from a netlink message.
447  */
448 struct flow_tcf_stats_basic {
449         bool valid;
450         struct gnet_stats_basic counters;
451 };
452
453 /** Empty masks for known item types. */
454 static const union {
455         struct rte_flow_item_port_id port_id;
456         struct rte_flow_item_eth eth;
457         struct rte_flow_item_vlan vlan;
458         struct rte_flow_item_ipv4 ipv4;
459         struct rte_flow_item_ipv6 ipv6;
460         struct rte_flow_item_tcp tcp;
461         struct rte_flow_item_udp udp;
462         struct rte_flow_item_vxlan vxlan;
463 } flow_tcf_mask_empty;
464
465 /** Supported masks for known item types. */
466 static const struct {
467         struct rte_flow_item_port_id port_id;
468         struct rte_flow_item_eth eth;
469         struct rte_flow_item_vlan vlan;
470         struct rte_flow_item_ipv4 ipv4;
471         struct rte_flow_item_ipv6 ipv6;
472         struct rte_flow_item_tcp tcp;
473         struct rte_flow_item_udp udp;
474         struct rte_flow_item_vxlan vxlan;
475 } flow_tcf_mask_supported = {
476         .port_id = {
477                 .id = 0xffffffff,
478         },
479         .eth = {
480                 .type = RTE_BE16(0xffff),
481                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
482                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
483         },
484         .vlan = {
485                 /* PCP and VID only, no DEI. */
486                 .tci = RTE_BE16(0xefff),
487                 .inner_type = RTE_BE16(0xffff),
488         },
489         .ipv4.hdr = {
490                 .next_proto_id = 0xff,
491                 .src_addr = RTE_BE32(0xffffffff),
492                 .dst_addr = RTE_BE32(0xffffffff),
493         },
494         .ipv6.hdr = {
495                 .proto = 0xff,
496                 .src_addr =
497                         "\xff\xff\xff\xff\xff\xff\xff\xff"
498                         "\xff\xff\xff\xff\xff\xff\xff\xff",
499                 .dst_addr =
500                         "\xff\xff\xff\xff\xff\xff\xff\xff"
501                         "\xff\xff\xff\xff\xff\xff\xff\xff",
502         },
503         .tcp.hdr = {
504                 .src_port = RTE_BE16(0xffff),
505                 .dst_port = RTE_BE16(0xffff),
506                 .tcp_flags = 0xff,
507         },
508         .udp.hdr = {
509                 .src_port = RTE_BE16(0xffff),
510                 .dst_port = RTE_BE16(0xffff),
511         },
512         .vxlan = {
513                .vni = "\xff\xff\xff",
514         },
515 };
516
517 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
518 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
519 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
520 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
521 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
522
523 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
524
525 /** DPDK port to network interface index (ifindex) conversion. */
526 struct flow_tcf_ptoi {
527         uint16_t port_id; /**< DPDK port ID. */
528         unsigned int ifindex; /**< Network interface index. */
529 };
530
531 /* Due to a limitation on driver/FW. */
532 #define MLX5_TCF_GROUP_ID_MAX 3
533 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
534
535 #define MLX5_TCF_FATE_ACTIONS \
536         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
537          MLX5_FLOW_ACTION_JUMP)
538
539 #define MLX5_TCF_VLAN_ACTIONS \
540         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
541          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
542
543 #define MLX5_TCF_VXLAN_ACTIONS \
544         (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
545
546 #define MLX5_TCF_PEDIT_ACTIONS \
547         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
548          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
549          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
550          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
551          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
552
553 #define MLX5_TCF_CONFIG_ACTIONS \
554         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
555          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
556          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
557          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
558
559 #define MAX_PEDIT_KEYS 128
560 #define SZ_PEDIT_KEY_VAL 4
561
562 #define NUM_OF_PEDIT_KEYS(sz) \
563         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
564
565 struct pedit_key_ex {
566         enum pedit_header_type htype;
567         enum pedit_cmd cmd;
568 };
569
570 struct pedit_parser {
571         struct tc_pedit_sel sel;
572         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
573         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
574 };
575
576 /**
577  * Create space for using the implicitly created TC flow counter.
578  *
579  * @param[in] dev
580  *   Pointer to the Ethernet device structure.
581  *
582  * @return
583  *   A pointer to the counter data structure, NULL otherwise and
584  *   rte_errno is set.
585  */
586 static struct mlx5_flow_counter *
587 flow_tcf_counter_new(void)
588 {
589         struct mlx5_flow_counter *cnt;
590
591         /*
592          * eswitch counter cannot be shared and its id is unknown.
593          * currently returning all with id 0.
594          * in the future maybe better to switch to unique numbers.
595          */
596         struct mlx5_flow_counter tmpl = {
597                 .ref_cnt = 1,
598         };
599         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
600         if (!cnt) {
601                 rte_errno = ENOMEM;
602                 return NULL;
603         }
604         *cnt = tmpl;
605         /* Implicit counter, do not add to list. */
606         return cnt;
607 }
608
609 /**
610  * Set pedit key of MAC address
611  *
612  * @param[in] actions
613  *   pointer to action specification
614  * @param[in,out] p_parser
615  *   pointer to pedit_parser
616  */
617 static void
618 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
619                            struct pedit_parser *p_parser)
620 {
621         int idx = p_parser->sel.nkeys;
622         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
623                                         offsetof(struct ether_hdr, s_addr) :
624                                         offsetof(struct ether_hdr, d_addr);
625         const struct rte_flow_action_set_mac *conf =
626                 (const struct rte_flow_action_set_mac *)actions->conf;
627
628         p_parser->keys[idx].off = off;
629         p_parser->keys[idx].mask = ~UINT32_MAX;
630         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
631         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
632         memcpy(&p_parser->keys[idx].val,
633                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
634         idx++;
635         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
636         p_parser->keys[idx].mask = 0xFFFF0000;
637         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
638         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
639         memcpy(&p_parser->keys[idx].val,
640                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
641                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
642         p_parser->sel.nkeys = (++idx);
643 }
644
645 /**
646  * Set pedit key of decrease/set ttl
647  *
648  * @param[in] actions
649  *   pointer to action specification
650  * @param[in,out] p_parser
651  *   pointer to pedit_parser
652  * @param[in] item_flags
653  *   flags of all items presented
654  */
655 static void
656 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
657                                 struct pedit_parser *p_parser,
658                                 uint64_t item_flags)
659 {
660         int idx = p_parser->sel.nkeys;
661
662         p_parser->keys[idx].mask = 0xFFFFFF00;
663         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
664                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
665                 p_parser->keys[idx].off =
666                         offsetof(struct ipv4_hdr, time_to_live);
667         }
668         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
669                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
670                 p_parser->keys[idx].off =
671                         offsetof(struct ipv6_hdr, hop_limits);
672         }
673         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
674                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
675                 p_parser->keys[idx].val = 0x000000FF;
676         } else {
677                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
678                 p_parser->keys[idx].val =
679                         (__u32)((const struct rte_flow_action_set_ttl *)
680                          actions->conf)->ttl_value;
681         }
682         p_parser->sel.nkeys = (++idx);
683 }
684
685 /**
686  * Set pedit key of transport (TCP/UDP) port value
687  *
688  * @param[in] actions
689  *   pointer to action specification
690  * @param[in,out] p_parser
691  *   pointer to pedit_parser
692  * @param[in] item_flags
693  *   flags of all items presented
694  */
695 static void
696 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
697                                 struct pedit_parser *p_parser,
698                                 uint64_t item_flags)
699 {
700         int idx = p_parser->sel.nkeys;
701
702         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
703                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
704         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
705                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
706         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
707         /* offset of src/dst port is same for TCP and UDP */
708         p_parser->keys[idx].off =
709                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
710                 offsetof(struct tcp_hdr, src_port) :
711                 offsetof(struct tcp_hdr, dst_port);
712         p_parser->keys[idx].mask = 0xFFFF0000;
713         p_parser->keys[idx].val =
714                 (__u32)((const struct rte_flow_action_set_tp *)
715                                 actions->conf)->port;
716         p_parser->sel.nkeys = (++idx);
717 }
718
719 /**
720  * Set pedit key of ipv6 address
721  *
722  * @param[in] actions
723  *   pointer to action specification
724  * @param[in,out] p_parser
725  *   pointer to pedit_parser
726  */
727 static void
728 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
729                                  struct pedit_parser *p_parser)
730 {
731         int idx = p_parser->sel.nkeys;
732         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
733         int off_base =
734                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
735                 offsetof(struct ipv6_hdr, src_addr) :
736                 offsetof(struct ipv6_hdr, dst_addr);
737         const struct rte_flow_action_set_ipv6 *conf =
738                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
739
740         for (int i = 0; i < keys; i++, idx++) {
741                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
742                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
743                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
744                 p_parser->keys[idx].mask = ~UINT32_MAX;
745                 memcpy(&p_parser->keys[idx].val,
746                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
747                         SZ_PEDIT_KEY_VAL);
748         }
749         p_parser->sel.nkeys += keys;
750 }
751
752 /**
753  * Set pedit key of ipv4 address
754  *
755  * @param[in] actions
756  *   pointer to action specification
757  * @param[in,out] p_parser
758  *   pointer to pedit_parser
759  */
760 static void
761 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
762                                  struct pedit_parser *p_parser)
763 {
764         int idx = p_parser->sel.nkeys;
765
766         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
767         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
768         p_parser->keys[idx].off =
769                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
770                 offsetof(struct ipv4_hdr, src_addr) :
771                 offsetof(struct ipv4_hdr, dst_addr);
772         p_parser->keys[idx].mask = ~UINT32_MAX;
773         p_parser->keys[idx].val =
774                 ((const struct rte_flow_action_set_ipv4 *)
775                  actions->conf)->ipv4_addr;
776         p_parser->sel.nkeys = (++idx);
777 }
778
779 /**
780  * Create the pedit's na attribute in netlink message
781  * on pre-allocate message buffer
782  *
783  * @param[in,out] nl
784  *   pointer to pre-allocated netlink message buffer
785  * @param[in,out] actions
786  *   pointer to pointer of actions specification.
787  * @param[in,out] action_flags
788  *   pointer to actions flags
789  * @param[in] item_flags
790  *   flags of all item presented
791  */
792 static void
793 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
794                               const struct rte_flow_action **actions,
795                               uint64_t item_flags)
796 {
797         struct pedit_parser p_parser;
798         struct nlattr *na_act_options;
799         struct nlattr *na_pedit_keys;
800
801         memset(&p_parser, 0, sizeof(p_parser));
802         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
803         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
804         /* all modify header actions should be in one tc-pedit action */
805         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
806                 switch ((*actions)->type) {
807                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
808                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
809                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
810                         break;
811                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
812                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
813                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
814                         break;
815                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
816                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
817                         flow_tcf_pedit_key_set_tp_port(*actions,
818                                                         &p_parser, item_flags);
819                         break;
820                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
821                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
822                         flow_tcf_pedit_key_set_dec_ttl(*actions,
823                                                         &p_parser, item_flags);
824                         break;
825                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
826                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
827                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
828                         break;
829                 default:
830                         goto pedit_mnl_msg_done;
831                 }
832         }
833 pedit_mnl_msg_done:
834         p_parser.sel.action = TC_ACT_PIPE;
835         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
836                      sizeof(p_parser.sel) +
837                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
838                      &p_parser);
839         na_pedit_keys =
840                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
841         for (int i = 0; i < p_parser.sel.nkeys; i++) {
842                 struct nlattr *na_pedit_key =
843                         mnl_attr_nest_start(nl,
844                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
845                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
846                                  p_parser.keys_ex[i].htype);
847                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
848                                  p_parser.keys_ex[i].cmd);
849                 mnl_attr_nest_end(nl, na_pedit_key);
850         }
851         mnl_attr_nest_end(nl, na_pedit_keys);
852         mnl_attr_nest_end(nl, na_act_options);
853         (*actions)--;
854 }
855
856 /**
857  * Calculate max memory size of one TC-pedit actions.
858  * One TC-pedit action can contain set of keys each defining
859  * a rewrite element (rte_flow action)
860  *
861  * @param[in,out] actions
862  *   actions specification.
863  * @param[in,out] action_flags
864  *   actions flags
865  * @param[in,out] size
866  *   accumulated size
867  * @return
868  *   Max memory size of one TC-pedit action
869  */
870 static int
871 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
872                                 uint64_t *action_flags)
873 {
874         int pedit_size = 0;
875         int keys = 0;
876         uint64_t flags = 0;
877
878         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
879                       SZ_NLATTR_STRZ_OF("pedit") +
880                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
881         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
882                 switch ((*actions)->type) {
883                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
884                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
885                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
886                         break;
887                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
888                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
889                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
890                         break;
891                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
892                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
893                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
894                         break;
895                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
896                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
897                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
898                         break;
899                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
900                         /* TCP is as same as UDP */
901                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
902                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
903                         break;
904                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
905                         /* TCP is as same as UDP */
906                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
907                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
908                         break;
909                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
910                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
911                         flags |= MLX5_FLOW_ACTION_SET_TTL;
912                         break;
913                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
914                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
915                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
916                         break;
917                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
918                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
919                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
920                         break;
921                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
922                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
923                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
924                         break;
925                 default:
926                         goto get_pedit_action_size_done;
927                 }
928         }
929 get_pedit_action_size_done:
930         /* TCA_PEDIT_PARAMS_EX */
931         pedit_size +=
932                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
933                                   keys * sizeof(struct tc_pedit_key));
934         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
935         pedit_size += keys *
936                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
937                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
938                        SZ_NLATTR_DATA_OF(2));
939         (*action_flags) |= flags;
940         (*actions)--;
941         return pedit_size;
942 }
943
944 /**
945  * Retrieve mask for pattern item.
946  *
947  * This function does basic sanity checks on a pattern item in order to
948  * return the most appropriate mask for it.
949  *
950  * @param[in] item
951  *   Item specification.
952  * @param[in] mask_default
953  *   Default mask for pattern item as specified by the flow API.
954  * @param[in] mask_supported
955  *   Mask fields supported by the implementation.
956  * @param[in] mask_empty
957  *   Empty mask to return when there is no specification.
958  * @param[out] error
959  *   Perform verbose error reporting if not NULL.
960  *
961  * @return
962  *   Either @p item->mask or one of the mask parameters on success, NULL
963  *   otherwise and rte_errno is set.
964  */
965 static const void *
966 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
967                    const void *mask_supported, const void *mask_empty,
968                    size_t mask_size, struct rte_flow_error *error)
969 {
970         const uint8_t *mask;
971         size_t i;
972
973         /* item->last and item->mask cannot exist without item->spec. */
974         if (!item->spec && (item->mask || item->last)) {
975                 rte_flow_error_set(error, EINVAL,
976                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
977                                    "\"mask\" or \"last\" field provided without"
978                                    " a corresponding \"spec\"");
979                 return NULL;
980         }
981         /* No spec, no mask, no problem. */
982         if (!item->spec)
983                 return mask_empty;
984         mask = item->mask ? item->mask : mask_default;
985         assert(mask);
986         /*
987          * Single-pass check to make sure that:
988          * - Mask is supported, no bits are set outside mask_supported.
989          * - Both item->spec and item->last are included in mask.
990          */
991         for (i = 0; i != mask_size; ++i) {
992                 if (!mask[i])
993                         continue;
994                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
995                     ((const uint8_t *)mask_supported)[i]) {
996                         rte_flow_error_set(error, ENOTSUP,
997                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
998                                            "unsupported field found"
999                                            " in \"mask\"");
1000                         return NULL;
1001                 }
1002                 if (item->last &&
1003                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
1004                     (((const uint8_t *)item->last)[i] & mask[i])) {
1005                         rte_flow_error_set(error, EINVAL,
1006                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
1007                                            item->last,
1008                                            "range between \"spec\" and \"last\""
1009                                            " not comprised in \"mask\"");
1010                         return NULL;
1011                 }
1012         }
1013         return mask;
1014 }
1015
1016 /**
1017  * Build a conversion table between port ID and ifindex.
1018  *
1019  * @param[in] dev
1020  *   Pointer to Ethernet device.
1021  * @param[out] ptoi
1022  *   Pointer to ptoi table.
1023  * @param[in] len
1024  *   Size of ptoi table provided.
1025  *
1026  * @return
1027  *   Size of ptoi table filled.
1028  */
1029 static unsigned int
1030 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
1031                           unsigned int len)
1032 {
1033         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1034         uint16_t port_id[n + 1];
1035         unsigned int i;
1036         unsigned int own = 0;
1037
1038         /* At least one port is needed when no switch domain is present. */
1039         if (!n) {
1040                 n = 1;
1041                 port_id[0] = dev->data->port_id;
1042         } else {
1043                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1044         }
1045         if (n > len)
1046                 return 0;
1047         for (i = 0; i != n; ++i) {
1048                 struct rte_eth_dev_info dev_info;
1049
1050                 rte_eth_dev_info_get(port_id[i], &dev_info);
1051                 if (port_id[i] == dev->data->port_id)
1052                         own = i;
1053                 ptoi[i].port_id = port_id[i];
1054                 ptoi[i].ifindex = dev_info.if_index;
1055         }
1056         /* Ensure first entry of ptoi[] is the current device. */
1057         if (own) {
1058                 ptoi[n] = ptoi[0];
1059                 ptoi[0] = ptoi[own];
1060                 ptoi[own] = ptoi[n];
1061         }
1062         /* An entry with zero ifindex terminates ptoi[]. */
1063         ptoi[n].port_id = 0;
1064         ptoi[n].ifindex = 0;
1065         return n;
1066 }
1067
1068 /**
1069  * Verify the @p attr will be correctly understood by the E-switch.
1070  *
1071  * @param[in] attr
1072  *   Pointer to flow attributes
1073  * @param[out] error
1074  *   Pointer to error structure.
1075  *
1076  * @return
1077  *   0 on success, a negative errno value otherwise and rte_errno is set.
1078  */
1079 static int
1080 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
1081                              struct rte_flow_error *error)
1082 {
1083         /*
1084          * Supported attributes: groups, some priorities and ingress only.
1085          * group is supported only if kernel supports chain. Don't care about
1086          * transfer as it is the caller's problem.
1087          */
1088         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
1089                 return rte_flow_error_set(error, ENOTSUP,
1090                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
1091                                           "group ID larger than "
1092                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
1093                                           " isn't supported");
1094         else if (attr->group > 0 &&
1095                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
1096                 return rte_flow_error_set(error, ENOTSUP,
1097                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1098                                           attr,
1099                                           "lowest priority level is "
1100                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1101                                           " when group is configured");
1102         else if (attr->priority > 0xfffe)
1103                 return rte_flow_error_set(error, ENOTSUP,
1104                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1105                                           attr,
1106                                           "lowest priority level is 0xfffe");
1107         if (!attr->ingress)
1108                 return rte_flow_error_set(error, EINVAL,
1109                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1110                                           attr, "only ingress is supported");
1111         if (attr->egress)
1112                 return rte_flow_error_set(error, ENOTSUP,
1113                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1114                                           attr, "egress is not supported");
1115         return 0;
1116 }
1117
1118 /**
1119  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
1120  * The routine checks the L2 fields to be used in encapsulation header.
1121  *
1122  * @param[in] item
1123  *   Pointer to the item structure.
1124  * @param[out] error
1125  *   Pointer to the error structure.
1126  *
1127  * @return
1128  *   0 on success, a negative errno value otherwise and rte_errno is set.
1129  **/
1130 static int
1131 flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
1132                                   struct rte_flow_error *error)
1133 {
1134         const struct rte_flow_item_eth *spec = item->spec;
1135         const struct rte_flow_item_eth *mask = item->mask;
1136
1137         if (!spec) {
1138                 /*
1139                  * Specification for L2 addresses can be empty
1140                  * because these ones are optional and not
1141                  * required directly by tc rule. Kernel tries
1142                  * to resolve these ones on its own
1143                  */
1144                 return 0;
1145         }
1146         if (!mask) {
1147                 /* If mask is not specified use the default one. */
1148                 mask = &rte_flow_item_eth_mask;
1149         }
1150         if (memcmp(&mask->dst,
1151                    &flow_tcf_mask_empty.eth.dst,
1152                    sizeof(flow_tcf_mask_empty.eth.dst))) {
1153                 if (memcmp(&mask->dst,
1154                            &rte_flow_item_eth_mask.dst,
1155                            sizeof(rte_flow_item_eth_mask.dst)))
1156                         return rte_flow_error_set
1157                                 (error, ENOTSUP,
1158                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1159                                  "no support for partial mask on"
1160                                  " \"eth.dst\" field");
1161         }
1162         if (memcmp(&mask->src,
1163                    &flow_tcf_mask_empty.eth.src,
1164                    sizeof(flow_tcf_mask_empty.eth.src))) {
1165                 if (memcmp(&mask->src,
1166                            &rte_flow_item_eth_mask.src,
1167                            sizeof(rte_flow_item_eth_mask.src)))
1168                         return rte_flow_error_set
1169                                 (error, ENOTSUP,
1170                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1171                                  "no support for partial mask on"
1172                                  " \"eth.src\" field");
1173         }
1174         if (mask->type != RTE_BE16(0x0000)) {
1175                 if (mask->type != RTE_BE16(0xffff))
1176                         return rte_flow_error_set
1177                                 (error, ENOTSUP,
1178                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1179                                  "no support for partial mask on"
1180                                  " \"eth.type\" field");
1181                 DRV_LOG(WARNING,
1182                         "outer ethernet type field"
1183                         " cannot be forced for vxlan"
1184                         " encapsulation, parameter ignored");
1185         }
1186         return 0;
1187 }
1188
1189 /**
1190  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
1191  * The routine checks the IPv4 fields to be used in encapsulation header.
1192  *
1193  * @param[in] item
1194  *   Pointer to the item structure.
1195  * @param[out] error
1196  *   Pointer to the error structure.
1197  *
1198  * @return
1199  *   0 on success, a negative errno value otherwise and rte_errno is set.
1200  **/
1201 static int
1202 flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
1203                                    struct rte_flow_error *error)
1204 {
1205         const struct rte_flow_item_ipv4 *spec = item->spec;
1206         const struct rte_flow_item_ipv4 *mask = item->mask;
1207
1208         if (!spec) {
1209                 /*
1210                  * Specification for IP addresses cannot be empty
1211                  * because it is required by tunnel_key parameter.
1212                  */
1213                 return rte_flow_error_set(error, EINVAL,
1214                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1215                                           "NULL outer ipv4 address"
1216                                           " specification for vxlan"
1217                                           " encapsulation");
1218         }
1219         if (!mask)
1220                 mask = &rte_flow_item_ipv4_mask;
1221         if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1222                 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1223                         return rte_flow_error_set
1224                                 (error, ENOTSUP,
1225                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1226                                  "no support for partial mask on"
1227                                  " \"ipv4.hdr.dst_addr\" field"
1228                                  " for vxlan encapsulation");
1229                 /* More IPv4 address validations can be put here. */
1230         } else {
1231                 /*
1232                  * Kernel uses the destination IP address to determine
1233                  * the routing path and obtain the MAC destination
1234                  * address, so IP destination address must be
1235                  * specified in the tc rule.
1236                  */
1237                 return rte_flow_error_set(error, EINVAL,
1238                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1239                                           "outer ipv4 destination address"
1240                                           " must be specified for"
1241                                           " vxlan encapsulation");
1242         }
1243         if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
1244                 if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
1245                         return rte_flow_error_set
1246                                 (error, ENOTSUP,
1247                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1248                                  "no support for partial mask on"
1249                                  " \"ipv4.hdr.src_addr\" field"
1250                                  " for vxlan encapsulation");
1251                 /* More IPv4 address validations can be put here. */
1252         } else {
1253                 /*
1254                  * Kernel uses the source IP address to select the
1255                  * interface for egress encapsulated traffic, so
1256                  * it must be specified in the tc rule.
1257                  */
1258                 return rte_flow_error_set(error, EINVAL,
1259                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1260                                           "outer ipv4 source address"
1261                                           " must be specified for"
1262                                           " vxlan encapsulation");
1263         }
1264         return 0;
1265 }
1266
1267 /**
1268  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
1269  * The routine checks the IPv6 fields to be used in encapsulation header.
1270  *
1271  * @param[in] item
1272  *   Pointer to the item structure.
1273  * @param[out] error
1274  *   Pointer to the error structure.
1275  *
1276  * @return
1277  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1278  **/
1279 static int
1280 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
1281                                    struct rte_flow_error *error)
1282 {
1283         const struct rte_flow_item_ipv6 *spec = item->spec;
1284         const struct rte_flow_item_ipv6 *mask = item->mask;
1285
1286         if (!spec) {
1287                 /*
1288                  * Specification for IP addresses cannot be empty
1289                  * because it is required by tunnel_key parameter.
1290                  */
1291                 return rte_flow_error_set(error, EINVAL,
1292                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1293                                           "NULL outer ipv6 address"
1294                                           " specification for"
1295                                           " vxlan encapsulation");
1296         }
1297         if (!mask)
1298                 mask = &rte_flow_item_ipv6_mask;
1299         if (memcmp(&mask->hdr.dst_addr,
1300                    &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1301                    IPV6_ADDR_LEN)) {
1302                 if (memcmp(&mask->hdr.dst_addr,
1303                            &rte_flow_item_ipv6_mask.hdr.dst_addr,
1304                            IPV6_ADDR_LEN))
1305                         return rte_flow_error_set
1306                                         (error, ENOTSUP,
1307                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1308                                          "no support for partial mask on"
1309                                          " \"ipv6.hdr.dst_addr\" field"
1310                                          " for vxlan encapsulation");
1311                 /* More IPv6 address validations can be put here. */
1312         } else {
1313                 /*
1314                  * Kernel uses the destination IP address to determine
1315                  * the routing path and obtain the MAC destination
1316                  * address (heigh or gate), so IP destination address
1317                  * must be specified within the tc rule.
1318                  */
1319                 return rte_flow_error_set(error, EINVAL,
1320                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1321                                           "outer ipv6 destination address"
1322                                           " must be specified for"
1323                                           " vxlan encapsulation");
1324         }
1325         if (memcmp(&mask->hdr.src_addr,
1326                    &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1327                    IPV6_ADDR_LEN)) {
1328                 if (memcmp(&mask->hdr.src_addr,
1329                            &rte_flow_item_ipv6_mask.hdr.src_addr,
1330                            IPV6_ADDR_LEN))
1331                         return rte_flow_error_set
1332                                         (error, ENOTSUP,
1333                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1334                                          "no support for partial mask on"
1335                                          " \"ipv6.hdr.src_addr\" field"
1336                                          " for vxlan encapsulation");
1337                 /* More L3 address validation can be put here. */
1338         } else {
1339                 /*
1340                  * Kernel uses the source IP address to select the
1341                  * interface for egress encapsulated traffic, so
1342                  * it must be specified in the tc rule.
1343                  */
1344                 return rte_flow_error_set(error, EINVAL,
1345                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1346                                           "outer L3 source address"
1347                                           " must be specified for"
1348                                           " vxlan encapsulation");
1349         }
1350         return 0;
1351 }
1352
1353 /**
1354  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
1355  * The routine checks the UDP fields to be used in encapsulation header.
1356  *
1357  * @param[in] item
1358  *   Pointer to the item structure.
1359  * @param[out] error
1360  *   Pointer to the error structure.
1361  *
1362  * @return
1363  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1364  **/
1365 static int
1366 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
1367                                   struct rte_flow_error *error)
1368 {
1369         const struct rte_flow_item_udp *spec = item->spec;
1370         const struct rte_flow_item_udp *mask = item->mask;
1371
1372         if (!spec) {
1373                 /*
1374                  * Specification for UDP ports cannot be empty
1375                  * because it is required by tunnel_key parameter.
1376                  */
1377                 return rte_flow_error_set(error, EINVAL,
1378                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1379                                           "NULL UDP port specification "
1380                                           " for vxlan encapsulation");
1381         }
1382         if (!mask)
1383                 mask = &rte_flow_item_udp_mask;
1384         if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1385                 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1386                         return rte_flow_error_set
1387                                         (error, ENOTSUP,
1388                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1389                                          "no support for partial mask on"
1390                                          " \"udp.hdr.dst_port\" field"
1391                                          " for vxlan encapsulation");
1392                 if (!spec->hdr.dst_port)
1393                         return rte_flow_error_set
1394                                         (error, EINVAL,
1395                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
1396                                          "outer UDP remote port cannot be"
1397                                          " 0 for vxlan encapsulation");
1398         } else {
1399                 return rte_flow_error_set(error, EINVAL,
1400                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1401                                           "outer UDP remote port"
1402                                           " must be specified for"
1403                                           " vxlan encapsulation");
1404         }
1405         if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1406                 if (mask->hdr.src_port != RTE_BE16(0xffff))
1407                         return rte_flow_error_set
1408                                         (error, ENOTSUP,
1409                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1410                                          "no support for partial mask on"
1411                                          " \"udp.hdr.src_port\" field"
1412                                          " for vxlan encapsulation");
1413                 DRV_LOG(WARNING,
1414                         "outer UDP source port cannot be"
1415                         " forced for vxlan encapsulation,"
1416                         " parameter ignored");
1417         }
1418         return 0;
1419 }
1420
1421 /**
1422  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
1423  * The routine checks the VNIP fields to be used in encapsulation header.
1424  *
1425  * @param[in] item
1426  *   Pointer to the item structure.
1427  * @param[out] error
1428  *   Pointer to the error structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1432  **/
1433 static int
1434 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
1435                                   struct rte_flow_error *error)
1436 {
1437         const struct rte_flow_item_vxlan *spec = item->spec;
1438         const struct rte_flow_item_vxlan *mask = item->mask;
1439
1440         if (!spec) {
1441                 /* Outer VNI is required by tunnel_key parameter. */
1442                 return rte_flow_error_set(error, EINVAL,
1443                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1444                                           "NULL VNI specification"
1445                                           " for vxlan encapsulation");
1446         }
1447         if (!mask)
1448                 mask = &rte_flow_item_vxlan_mask;
1449         if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
1450                 return rte_flow_error_set(error, EINVAL,
1451                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1452                                           "outer VNI must be specified "
1453                                           "for vxlan encapsulation");
1454         if (mask->vni[0] != 0xff ||
1455             mask->vni[1] != 0xff ||
1456             mask->vni[2] != 0xff)
1457                 return rte_flow_error_set(error, ENOTSUP,
1458                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1459                                           "no support for partial mask on"
1460                                           " \"vxlan.vni\" field");
1461
1462         if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
1463                 return rte_flow_error_set(error, EINVAL,
1464                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1465                                           "vxlan vni cannot be 0");
1466         return 0;
1467 }
1468
1469 /**
1470  * Validate VXLAN_ENCAP action item list for E-Switch.
1471  * The routine checks items to be used in encapsulation header.
1472  *
1473  * @param[in] action
1474  *   Pointer to the VXLAN_ENCAP action structure.
1475  * @param[out] error
1476  *   Pointer to the error structure.
1477  *
1478  * @return
1479  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1480  **/
1481 static int
1482 flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
1483                               struct rte_flow_error *error)
1484 {
1485         const struct rte_flow_item *items;
1486         int ret;
1487         uint32_t item_flags = 0;
1488
1489         if (!action->conf)
1490                 return rte_flow_error_set(error, EINVAL,
1491                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1492                                           "Missing vxlan tunnel"
1493                                           " action configuration");
1494         items = ((const struct rte_flow_action_vxlan_encap *)
1495                                         action->conf)->definition;
1496         if (!items)
1497                 return rte_flow_error_set(error, EINVAL,
1498                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1499                                           "Missing vxlan tunnel"
1500                                           " encapsulation parameters");
1501         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1502                 switch (items->type) {
1503                 case RTE_FLOW_ITEM_TYPE_VOID:
1504                         break;
1505                 case RTE_FLOW_ITEM_TYPE_ETH:
1506                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1507                                                           error);
1508                         if (ret < 0)
1509                                 return ret;
1510                         ret = flow_tcf_validate_vxlan_encap_eth(items, error);
1511                         if (ret < 0)
1512                                 return ret;
1513                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1514                         break;
1515                 break;
1516                 case RTE_FLOW_ITEM_TYPE_IPV4:
1517                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1518                                                            error);
1519                         if (ret < 0)
1520                                 return ret;
1521                         ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
1522                         if (ret < 0)
1523                                 return ret;
1524                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1525                         break;
1526                 case RTE_FLOW_ITEM_TYPE_IPV6:
1527                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1528                                                            error);
1529                         if (ret < 0)
1530                                 return ret;
1531                         ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
1532                         if (ret < 0)
1533                                 return ret;
1534                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1535                         break;
1536                 case RTE_FLOW_ITEM_TYPE_UDP:
1537                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1538                                                            0xFF, error);
1539                         if (ret < 0)
1540                                 return ret;
1541                         ret = flow_tcf_validate_vxlan_encap_udp(items, error);
1542                         if (ret < 0)
1543                                 return ret;
1544                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1545                         break;
1546                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1547                         ret = mlx5_flow_validate_item_vxlan(items,
1548                                                             item_flags, error);
1549                         if (ret < 0)
1550                                 return ret;
1551                         ret = flow_tcf_validate_vxlan_encap_vni(items, error);
1552                         if (ret < 0)
1553                                 return ret;
1554                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1555                         break;
1556                 default:
1557                         return rte_flow_error_set
1558                                         (error, ENOTSUP,
1559                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1560                                          "vxlan encap item not supported");
1561                 }
1562         }
1563         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
1564                 return rte_flow_error_set(error, EINVAL,
1565                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1566                                           "no outer IP layer found"
1567                                           " for vxlan encapsulation");
1568         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1569                 return rte_flow_error_set(error, EINVAL,
1570                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1571                                           "no outer UDP layer found"
1572                                           " for vxlan encapsulation");
1573         if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
1574                 return rte_flow_error_set(error, EINVAL,
1575                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1576                                           "no VXLAN VNI found"
1577                                           " for vxlan encapsulation");
1578         return 0;
1579 }
1580
1581 /**
1582  * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
1583  * is present in actions list.
1584  *
1585  * @param[in] ipv4
1586  *   Outer IPv4 address item (if any, NULL otherwise).
1587  * @param[out] error
1588  *   Pointer to the error structure.
1589  *
1590  * @return
1591  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1592  **/
1593 static int
1594 flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
1595                                    struct rte_flow_error *error)
1596 {
1597         const struct rte_flow_item_ipv4 *spec = ipv4->spec;
1598         const struct rte_flow_item_ipv4 *mask = ipv4->mask;
1599
1600         if (!spec) {
1601                 /*
1602                  * Specification for IP addresses cannot be empty
1603                  * because it is required as decap parameter.
1604                  */
1605                 return rte_flow_error_set(error, EINVAL,
1606                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1607                                           "NULL outer ipv4 address"
1608                                           " specification for vxlan"
1609                                           " for vxlan decapsulation");
1610         }
1611         if (!mask)
1612                 mask = &rte_flow_item_ipv4_mask;
1613         if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1614                 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1615                         return rte_flow_error_set
1616                                         (error, ENOTSUP,
1617                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1618                                          "no support for partial mask on"
1619                                          " \"ipv4.hdr.dst_addr\" field");
1620                 /* More IP address validations can be put here. */
1621         } else {
1622                 /*
1623                  * Kernel uses the destination IP address
1624                  * to determine the ingress network interface
1625                  * for traffic being decapsulated.
1626                  */
1627                 return rte_flow_error_set(error, EINVAL,
1628                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1629                                           "outer ipv4 destination address"
1630                                           " must be specified for"
1631                                           " vxlan decapsulation");
1632         }
1633         /* Source IP address is optional for decap. */
1634         if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
1635             mask->hdr.src_addr != RTE_BE32(0xffffffff))
1636                 return rte_flow_error_set(error, ENOTSUP,
1637                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1638                                           "no support for partial mask on"
1639                                           " \"ipv4.hdr.src_addr\" field");
1640         return 0;
1641 }
1642
1643 /**
1644  * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
1645  * is present in actions list.
1646  *
1647  * @param[in] ipv6
1648  *   Outer IPv6 address item (if any, NULL otherwise).
1649  * @param[out] error
1650  *   Pointer to the error structure.
1651  *
1652  * @return
1653  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1654  **/
1655 static int
1656 flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
1657                                    struct rte_flow_error *error)
1658 {
1659         const struct rte_flow_item_ipv6 *spec = ipv6->spec;
1660         const struct rte_flow_item_ipv6 *mask = ipv6->mask;
1661
1662         if (!spec) {
1663                 /*
1664                  * Specification for IP addresses cannot be empty
1665                  * because it is required as decap parameter.
1666                  */
1667                 return rte_flow_error_set(error, EINVAL,
1668                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1669                                           "NULL outer ipv6 address"
1670                                           " specification for vxlan"
1671                                           " decapsulation");
1672         }
1673         if (!mask)
1674                 mask = &rte_flow_item_ipv6_mask;
1675         if (memcmp(&mask->hdr.dst_addr,
1676                    &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1677                    IPV6_ADDR_LEN)) {
1678                 if (memcmp(&mask->hdr.dst_addr,
1679                         &rte_flow_item_ipv6_mask.hdr.dst_addr,
1680                         IPV6_ADDR_LEN))
1681                         return rte_flow_error_set
1682                                         (error, ENOTSUP,
1683                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1684                                          "no support for partial mask on"
1685                                          " \"ipv6.hdr.dst_addr\" field");
1686                 /* More IP address validations can be put here. */
1687         } else {
1688                 /*
1689                  * Kernel uses the destination IP address
1690                  * to determine the ingress network interface
1691                  * for traffic being decapsulated.
1692                  */
1693                 return rte_flow_error_set(error, EINVAL,
1694                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1695                                           "outer ipv6 destination address must be "
1696                                           "specified for vxlan decapsulation");
1697         }
1698         /* Source IP address is optional for decap. */
1699         if (memcmp(&mask->hdr.src_addr,
1700                    &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1701                    IPV6_ADDR_LEN)) {
1702                 if (memcmp(&mask->hdr.src_addr,
1703                            &rte_flow_item_ipv6_mask.hdr.src_addr,
1704                            IPV6_ADDR_LEN))
1705                         return rte_flow_error_set
1706                                         (error, ENOTSUP,
1707                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1708                                          "no support for partial mask on"
1709                                          " \"ipv6.hdr.src_addr\" field");
1710         }
1711         return 0;
1712 }
1713
1714 /**
1715  * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
1716  * is present in actions list.
1717  *
1718  * @param[in] udp
1719  *   Outer UDP layer item (if any, NULL otherwise).
1720  * @param[out] error
1721  *   Pointer to the error structure.
1722  *
1723  * @return
1724  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1725  **/
1726 static int
1727 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
1728                                   struct rte_flow_error *error)
1729 {
1730         const struct rte_flow_item_udp *spec = udp->spec;
1731         const struct rte_flow_item_udp *mask = udp->mask;
1732
1733         if (!spec)
1734                 /*
1735                  * Specification for UDP ports cannot be empty
1736                  * because it is required as decap parameter.
1737                  */
1738                 return rte_flow_error_set(error, EINVAL,
1739                                           RTE_FLOW_ERROR_TYPE_ITEM, udp,
1740                                           "NULL UDP port specification"
1741                                           " for VXLAN decapsulation");
1742         if (!mask)
1743                 mask = &rte_flow_item_udp_mask;
1744         if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1745                 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1746                         return rte_flow_error_set
1747                                         (error, ENOTSUP,
1748                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1749                                          "no support for partial mask on"
1750                                          " \"udp.hdr.dst_port\" field");
1751                 if (!spec->hdr.dst_port)
1752                         return rte_flow_error_set
1753                                         (error, EINVAL,
1754                                          RTE_FLOW_ERROR_TYPE_ITEM, udp,
1755                                          "zero decap local UDP port");
1756         } else {
1757                 return rte_flow_error_set(error, EINVAL,
1758                                           RTE_FLOW_ERROR_TYPE_ITEM, udp,
1759                                           "outer UDP destination port must be "
1760                                           "specified for vxlan decapsulation");
1761         }
1762         if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1763                 if (mask->hdr.src_port != RTE_BE16(0xffff))
1764                         return rte_flow_error_set
1765                                         (error, ENOTSUP,
1766                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1767                                          "no support for partial mask on"
1768                                          " \"udp.hdr.src_port\" field");
1769                 DRV_LOG(WARNING,
1770                         "outer UDP local port cannot be "
1771                         "forced for VXLAN encapsulation, "
1772                         "parameter ignored");
1773         }
1774         return 0;
1775 }
1776
1777 /**
1778  * Validate flow for E-Switch.
1779  *
1780  * @param[in] priv
1781  *   Pointer to the priv structure.
1782  * @param[in] attr
1783  *   Pointer to the flow attributes.
1784  * @param[in] items
1785  *   Pointer to the list of items.
1786  * @param[in] actions
1787  *   Pointer to the list of actions.
1788  * @param[out] error
1789  *   Pointer to the error structure.
1790  *
1791  * @return
1792  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1793  */
1794 static int
1795 flow_tcf_validate(struct rte_eth_dev *dev,
1796                   const struct rte_flow_attr *attr,
1797                   const struct rte_flow_item items[],
1798                   const struct rte_flow_action actions[],
1799                   struct rte_flow_error *error)
1800 {
1801         union {
1802                 const struct rte_flow_item_port_id *port_id;
1803                 const struct rte_flow_item_eth *eth;
1804                 const struct rte_flow_item_vlan *vlan;
1805                 const struct rte_flow_item_ipv4 *ipv4;
1806                 const struct rte_flow_item_ipv6 *ipv6;
1807                 const struct rte_flow_item_tcp *tcp;
1808                 const struct rte_flow_item_udp *udp;
1809                 const struct rte_flow_item_vxlan *vxlan;
1810         } spec, mask;
1811         union {
1812                 const struct rte_flow_action_port_id *port_id;
1813                 const struct rte_flow_action_jump *jump;
1814                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1815                 const struct rte_flow_action_of_set_vlan_vid *
1816                         of_set_vlan_vid;
1817                 const struct rte_flow_action_of_set_vlan_pcp *
1818                         of_set_vlan_pcp;
1819                 const struct rte_flow_action_vxlan_encap *vxlan_encap;
1820                 const struct rte_flow_action_set_ipv4 *set_ipv4;
1821                 const struct rte_flow_action_set_ipv6 *set_ipv6;
1822         } conf;
1823         uint64_t item_flags = 0;
1824         uint64_t action_flags = 0;
1825         uint8_t next_protocol = -1;
1826         unsigned int tcm_ifindex = 0;
1827         uint8_t pedit_validated = 0;
1828         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1829         struct rte_eth_dev *port_id_dev = NULL;
1830         bool in_port_id_set;
1831         int ret;
1832
1833         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1834                                                 PTOI_TABLE_SZ_MAX(dev)));
1835         ret = flow_tcf_validate_attributes(attr, error);
1836         if (ret < 0)
1837                 return ret;
1838         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1839                 unsigned int i;
1840                 uint64_t current_action_flag = 0;
1841
1842                 switch (actions->type) {
1843                 case RTE_FLOW_ACTION_TYPE_VOID:
1844                         break;
1845                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1846                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1847                         if (!actions->conf)
1848                                 break;
1849                         conf.port_id = actions->conf;
1850                         if (conf.port_id->original)
1851                                 i = 0;
1852                         else
1853                                 for (i = 0; ptoi[i].ifindex; ++i)
1854                                         if (ptoi[i].port_id == conf.port_id->id)
1855                                                 break;
1856                         if (!ptoi[i].ifindex)
1857                                 return rte_flow_error_set
1858                                         (error, ENODEV,
1859                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1860                                          conf.port_id,
1861                                          "missing data to convert port ID to"
1862                                          " ifindex");
1863                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1864                         break;
1865                 case RTE_FLOW_ACTION_TYPE_JUMP:
1866                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1867                         if (!actions->conf)
1868                                 break;
1869                         conf.jump = actions->conf;
1870                         if (attr->group >= conf.jump->group)
1871                                 return rte_flow_error_set
1872                                         (error, ENOTSUP,
1873                                          RTE_FLOW_ERROR_TYPE_ACTION,
1874                                          actions,
1875                                          "can jump only to a group forward");
1876                         break;
1877                 case RTE_FLOW_ACTION_TYPE_DROP:
1878                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1879                         break;
1880                 case RTE_FLOW_ACTION_TYPE_COUNT:
1881                         break;
1882                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1883                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1884                         break;
1885                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1886                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1887                         break;
1888                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1889                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1890                                 return rte_flow_error_set
1891                                         (error, ENOTSUP,
1892                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1893                                          "vlan modify is not supported,"
1894                                          " set action must follow push action");
1895                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1896                         break;
1897                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1898                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1899                                 return rte_flow_error_set
1900                                         (error, ENOTSUP,
1901                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1902                                          "vlan modify is not supported,"
1903                                          " set action must follow push action");
1904                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1905                         break;
1906                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
1907                         current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
1908                         break;
1909                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
1910                         ret = flow_tcf_validate_vxlan_encap(actions, error);
1911                         if (ret < 0)
1912                                 return ret;
1913                         current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
1914                         break;
1915                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1916                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1917                         break;
1918                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1919                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1920                         break;
1921                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1922                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1923                         break;
1924                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1925                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1926                         break;
1927                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1928                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1929                         break;
1930                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1931                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1932                         break;
1933                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1934                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1935                         break;
1936                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1937                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1938                         break;
1939                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1940                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1941                         break;
1942                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1943                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1944                         break;
1945                 default:
1946                         return rte_flow_error_set(error, ENOTSUP,
1947                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1948                                                   actions,
1949                                                   "action not supported");
1950                 }
1951                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1952                         if (!actions->conf)
1953                                 return rte_flow_error_set
1954                                         (error, EINVAL,
1955                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1956                                          actions,
1957                                          "action configuration not set");
1958                 }
1959                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1960                     pedit_validated)
1961                         return rte_flow_error_set(error, ENOTSUP,
1962                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1963                                                   actions,
1964                                                   "set actions should be "
1965                                                   "listed successively");
1966                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1967                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1968                         pedit_validated = 1;
1969                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1970                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1971                         return rte_flow_error_set(error, EINVAL,
1972                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1973                                                   actions,
1974                                                   "can't have multiple fate"
1975                                                   " actions");
1976                 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1977                     (action_flags & MLX5_TCF_VXLAN_ACTIONS))
1978                         return rte_flow_error_set(error, EINVAL,
1979                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1980                                                   actions,
1981                                                   "can't have multiple vxlan"
1982                                                   " actions");
1983                 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1984                     (action_flags & MLX5_TCF_VLAN_ACTIONS))
1985                         return rte_flow_error_set(error, ENOTSUP,
1986                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1987                                                   actions,
1988                                                   "can't have vxlan and vlan"
1989                                                   " actions in the same rule");
1990                 action_flags |= current_action_flag;
1991         }
1992         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1993                 unsigned int i;
1994
1995                 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
1996                     items->type != RTE_FLOW_ITEM_TYPE_ETH)
1997                         return rte_flow_error_set(error, ENOTSUP,
1998                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1999                                                   items,
2000                                                   "only L2 inner item"
2001                                                   " is supported");
2002                 switch (items->type) {
2003                 case RTE_FLOW_ITEM_TYPE_VOID:
2004                         break;
2005                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2006                         mask.port_id = flow_tcf_item_mask
2007                                 (items, &rte_flow_item_port_id_mask,
2008                                  &flow_tcf_mask_supported.port_id,
2009                                  &flow_tcf_mask_empty.port_id,
2010                                  sizeof(flow_tcf_mask_supported.port_id),
2011                                  error);
2012                         if (!mask.port_id)
2013                                 return -rte_errno;
2014                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
2015                                 in_port_id_set = 1;
2016                                 break;
2017                         }
2018                         spec.port_id = items->spec;
2019                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
2020                                 return rte_flow_error_set
2021                                         (error, ENOTSUP,
2022                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2023                                          mask.port_id,
2024                                          "no support for partial mask on"
2025                                          " \"id\" field");
2026                         if (!mask.port_id->id)
2027                                 i = 0;
2028                         else
2029                                 for (i = 0; ptoi[i].ifindex; ++i)
2030                                         if (ptoi[i].port_id == spec.port_id->id)
2031                                                 break;
2032                         if (!ptoi[i].ifindex)
2033                                 return rte_flow_error_set
2034                                         (error, ENODEV,
2035                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2036                                          spec.port_id,
2037                                          "missing data to convert port ID to"
2038                                          " ifindex");
2039                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
2040                                 return rte_flow_error_set
2041                                         (error, ENOTSUP,
2042                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2043                                          spec.port_id,
2044                                          "cannot match traffic for"
2045                                          " several port IDs through"
2046                                          " a single flow rule");
2047                         tcm_ifindex = ptoi[i].ifindex;
2048                         in_port_id_set = 1;
2049                         break;
2050                 case RTE_FLOW_ITEM_TYPE_ETH:
2051                         ret = mlx5_flow_validate_item_eth(items, item_flags,
2052                                                           error);
2053                         if (ret < 0)
2054                                 return ret;
2055                         item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2056                                         MLX5_FLOW_LAYER_INNER_L2 :
2057                                         MLX5_FLOW_LAYER_OUTER_L2;
2058                         /* TODO:
2059                          * Redundant check due to different supported mask.
2060                          * Same for the rest of items.
2061                          */
2062                         mask.eth = flow_tcf_item_mask
2063                                 (items, &rte_flow_item_eth_mask,
2064                                  &flow_tcf_mask_supported.eth,
2065                                  &flow_tcf_mask_empty.eth,
2066                                  sizeof(flow_tcf_mask_supported.eth),
2067                                  error);
2068                         if (!mask.eth)
2069                                 return -rte_errno;
2070                         if (mask.eth->type && mask.eth->type !=
2071                             RTE_BE16(0xffff))
2072                                 return rte_flow_error_set
2073                                         (error, ENOTSUP,
2074                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2075                                          mask.eth,
2076                                          "no support for partial mask on"
2077                                          " \"type\" field");
2078                         break;
2079                 case RTE_FLOW_ITEM_TYPE_VLAN:
2080                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
2081                                                            error);
2082                         if (ret < 0)
2083                                 return ret;
2084                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2085                         mask.vlan = flow_tcf_item_mask
2086                                 (items, &rte_flow_item_vlan_mask,
2087                                  &flow_tcf_mask_supported.vlan,
2088                                  &flow_tcf_mask_empty.vlan,
2089                                  sizeof(flow_tcf_mask_supported.vlan),
2090                                  error);
2091                         if (!mask.vlan)
2092                                 return -rte_errno;
2093                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
2094                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
2095                               RTE_BE16(0xe000)) ||
2096                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
2097                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
2098                               RTE_BE16(0x0fff)) ||
2099                             (mask.vlan->inner_type &&
2100                              mask.vlan->inner_type != RTE_BE16(0xffff)))
2101                                 return rte_flow_error_set
2102                                         (error, ENOTSUP,
2103                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2104                                          mask.vlan,
2105                                          "no support for partial masks on"
2106                                          " \"tci\" (PCP and VID parts) and"
2107                                          " \"inner_type\" fields");
2108                         break;
2109                 case RTE_FLOW_ITEM_TYPE_IPV4:
2110                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
2111                                                            error);
2112                         if (ret < 0)
2113                                 return ret;
2114                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2115                         mask.ipv4 = flow_tcf_item_mask
2116                                 (items, &rte_flow_item_ipv4_mask,
2117                                  &flow_tcf_mask_supported.ipv4,
2118                                  &flow_tcf_mask_empty.ipv4,
2119                                  sizeof(flow_tcf_mask_supported.ipv4),
2120                                  error);
2121                         if (!mask.ipv4)
2122                                 return -rte_errno;
2123                         if (mask.ipv4->hdr.next_proto_id &&
2124                             mask.ipv4->hdr.next_proto_id != 0xff)
2125                                 return rte_flow_error_set
2126                                         (error, ENOTSUP,
2127                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2128                                          mask.ipv4,
2129                                          "no support for partial mask on"
2130                                          " \"hdr.next_proto_id\" field");
2131                         else if (mask.ipv4->hdr.next_proto_id)
2132                                 next_protocol =
2133                                         ((const struct rte_flow_item_ipv4 *)
2134                                          (items->spec))->hdr.next_proto_id;
2135                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2136                                 ret = flow_tcf_validate_vxlan_decap_ipv4
2137                                                                 (items, error);
2138                                 if (ret < 0)
2139                                         return ret;
2140                         }
2141                         break;
2142                 case RTE_FLOW_ITEM_TYPE_IPV6:
2143                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
2144                                                            error);
2145                         if (ret < 0)
2146                                 return ret;
2147                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2148                         mask.ipv6 = flow_tcf_item_mask
2149                                 (items, &rte_flow_item_ipv6_mask,
2150                                  &flow_tcf_mask_supported.ipv6,
2151                                  &flow_tcf_mask_empty.ipv6,
2152                                  sizeof(flow_tcf_mask_supported.ipv6),
2153                                  error);
2154                         if (!mask.ipv6)
2155                                 return -rte_errno;
2156                         if (mask.ipv6->hdr.proto &&
2157                             mask.ipv6->hdr.proto != 0xff)
2158                                 return rte_flow_error_set
2159                                         (error, ENOTSUP,
2160                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2161                                          mask.ipv6,
2162                                          "no support for partial mask on"
2163                                          " \"hdr.proto\" field");
2164                         else if (mask.ipv6->hdr.proto)
2165                                 next_protocol =
2166                                         ((const struct rte_flow_item_ipv6 *)
2167                                          (items->spec))->hdr.proto;
2168                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2169                                 ret = flow_tcf_validate_vxlan_decap_ipv6
2170                                                                 (items, error);
2171                                 if (ret < 0)
2172                                         return ret;
2173                         }
2174                         break;
2175                 case RTE_FLOW_ITEM_TYPE_UDP:
2176                         ret = mlx5_flow_validate_item_udp(items, item_flags,
2177                                                           next_protocol, error);
2178                         if (ret < 0)
2179                                 return ret;
2180                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2181                         mask.udp = flow_tcf_item_mask
2182                                 (items, &rte_flow_item_udp_mask,
2183                                  &flow_tcf_mask_supported.udp,
2184                                  &flow_tcf_mask_empty.udp,
2185                                  sizeof(flow_tcf_mask_supported.udp),
2186                                  error);
2187                         if (!mask.udp)
2188                                 return -rte_errno;
2189                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2190                                 ret = flow_tcf_validate_vxlan_decap_udp
2191                                                                 (items, error);
2192                                 if (ret < 0)
2193                                         return ret;
2194                         }
2195                         break;
2196                 case RTE_FLOW_ITEM_TYPE_TCP:
2197                         ret = mlx5_flow_validate_item_tcp
2198                                              (items, item_flags,
2199                                               next_protocol,
2200                                               &flow_tcf_mask_supported.tcp,
2201                                               error);
2202                         if (ret < 0)
2203                                 return ret;
2204                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2205                         mask.tcp = flow_tcf_item_mask
2206                                 (items, &rte_flow_item_tcp_mask,
2207                                  &flow_tcf_mask_supported.tcp,
2208                                  &flow_tcf_mask_empty.tcp,
2209                                  sizeof(flow_tcf_mask_supported.tcp),
2210                                  error);
2211                         if (!mask.tcp)
2212                                 return -rte_errno;
2213                         break;
2214                 case RTE_FLOW_ITEM_TYPE_VXLAN:
2215                         if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
2216                                 return rte_flow_error_set
2217                                         (error, ENOTSUP,
2218                                          RTE_FLOW_ERROR_TYPE_ITEM,
2219                                          items,
2220                                          "vni pattern should be followed by"
2221                                          " vxlan decapsulation action");
2222                         ret = mlx5_flow_validate_item_vxlan(items,
2223                                                             item_flags, error);
2224                         if (ret < 0)
2225                                 return ret;
2226                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
2227                         mask.vxlan = flow_tcf_item_mask
2228                                 (items, &rte_flow_item_vxlan_mask,
2229                                  &flow_tcf_mask_supported.vxlan,
2230                                  &flow_tcf_mask_empty.vxlan,
2231                                  sizeof(flow_tcf_mask_supported.vxlan), error);
2232                         if (!mask.vxlan)
2233                                 return -rte_errno;
2234                         if (mask.vxlan->vni[0] != 0xff ||
2235                             mask.vxlan->vni[1] != 0xff ||
2236                             mask.vxlan->vni[2] != 0xff)
2237                                 return rte_flow_error_set
2238                                         (error, ENOTSUP,
2239                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2240                                          mask.vxlan,
2241                                          "no support for partial or "
2242                                          "empty mask on \"vxlan.vni\" field");
2243                         break;
2244                 default:
2245                         return rte_flow_error_set(error, ENOTSUP,
2246                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2247                                                   items, "item not supported");
2248                 }
2249         }
2250         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2251             (action_flags & MLX5_FLOW_ACTION_DROP))
2252                 return rte_flow_error_set(error, ENOTSUP,
2253                                           RTE_FLOW_ERROR_TYPE_ACTION,
2254                                           actions,
2255                                           "set action is not compatible with "
2256                                           "drop action");
2257         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2258             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2259                 return rte_flow_error_set(error, ENOTSUP,
2260                                           RTE_FLOW_ERROR_TYPE_ACTION,
2261                                           actions,
2262                                           "set action must be followed by "
2263                                           "port_id action");
2264         if (action_flags &
2265            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
2266                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
2267                         return rte_flow_error_set(error, EINVAL,
2268                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2269                                                   actions,
2270                                                   "no ipv4 item found in"
2271                                                   " pattern");
2272         }
2273         if (action_flags &
2274            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
2275                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
2276                         return rte_flow_error_set(error, EINVAL,
2277                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2278                                                   actions,
2279                                                   "no ipv6 item found in"
2280                                                   " pattern");
2281         }
2282         if (action_flags &
2283            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
2284                 if (!(item_flags &
2285                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
2286                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
2287                         return rte_flow_error_set(error, EINVAL,
2288                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2289                                                   actions,
2290                                                   "no TCP/UDP item found in"
2291                                                   " pattern");
2292         }
2293         /*
2294          * FW syndrome (0xA9C090):
2295          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
2296          *     forward to the uplink.
2297          */
2298         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
2299             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
2300             ((struct priv *)port_id_dev->data->dev_private)->representor)
2301                 return rte_flow_error_set(error, ENOTSUP,
2302                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2303                                           "vlan push can only be applied"
2304                                           " when forwarding to uplink port");
2305         /*
2306          * FW syndrome (0x294609):
2307          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
2308          *     are supported only while forwarding to vport.
2309          */
2310         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
2311             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2312                 return rte_flow_error_set(error, ENOTSUP,
2313                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2314                                           "vlan actions are supported"
2315                                           " only with port_id action");
2316         if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
2317             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2318                 return rte_flow_error_set(error, ENOTSUP,
2319                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2320                                           "vxlan actions are supported"
2321                                           " only with port_id action");
2322         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
2323                 return rte_flow_error_set(error, EINVAL,
2324                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2325                                           "no fate action is found");
2326         if (action_flags &
2327            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
2328                 if (!(item_flags &
2329                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2330                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2331                         return rte_flow_error_set(error, EINVAL,
2332                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2333                                                   actions,
2334                                                   "no IP found in pattern");
2335         }
2336         if (action_flags &
2337             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
2338                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
2339                         return rte_flow_error_set(error, ENOTSUP,
2340                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2341                                                   actions,
2342                                                   "no ethernet found in"
2343                                                   " pattern");
2344         }
2345         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2346                 if (!(item_flags &
2347                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2348                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2349                         return rte_flow_error_set(error, EINVAL,
2350                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2351                                                   NULL,
2352                                                   "no outer IP pattern found"
2353                                                   " for vxlan decap action");
2354                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2355                         return rte_flow_error_set(error, EINVAL,
2356                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2357                                                   NULL,
2358                                                   "no outer UDP pattern found"
2359                                                   " for vxlan decap action");
2360                 if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
2361                         return rte_flow_error_set(error, EINVAL,
2362                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2363                                                   NULL,
2364                                                   "no VNI pattern found"
2365                                                   " for vxlan decap action");
2366         }
2367         return 0;
2368 }
2369
2370 /**
2371  * Calculate maximum size of memory for flow items of Linux TC flower and
2372  * extract specified items.
2373  *
2374  * @param[in] items
2375  *   Pointer to the list of items.
2376  * @param[out] item_flags
2377  *   Pointer to the detected items.
2378  *
2379  * @return
2380  *   Maximum size of memory for items.
2381  */
2382 static int
2383 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
2384                             const struct rte_flow_item items[],
2385                             uint64_t *item_flags)
2386 {
2387         int size = 0;
2388         uint64_t flags = 0;
2389
2390         size += SZ_NLATTR_STRZ_OF("flower") +
2391                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
2392                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
2393         if (attr->group > 0)
2394                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
2395         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2396                 switch (items->type) {
2397                 case RTE_FLOW_ITEM_TYPE_VOID:
2398                         break;
2399                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2400                         break;
2401                 case RTE_FLOW_ITEM_TYPE_ETH:
2402                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2403                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
2404                                 /* dst/src MAC addr and mask. */
2405                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
2406                         break;
2407                 case RTE_FLOW_ITEM_TYPE_VLAN:
2408                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2409                                 SZ_NLATTR_TYPE_OF(uint16_t) +
2410                                 /* VLAN Ether type. */
2411                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
2412                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
2413                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2414                         break;
2415                 case RTE_FLOW_ITEM_TYPE_IPV4:
2416                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2417                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2418                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
2419                                 /* dst/src IP addr and mask. */
2420                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2421                         break;
2422                 case RTE_FLOW_ITEM_TYPE_IPV6:
2423                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2424                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2425                                 SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
2426                                 /* dst/src IP addr and mask. */
2427                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2428                         break;
2429                 case RTE_FLOW_ITEM_TYPE_UDP:
2430                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2431                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2432                                 /* dst/src port and mask. */
2433                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2434                         break;
2435                 case RTE_FLOW_ITEM_TYPE_TCP:
2436                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2437                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2438                                 /* dst/src port and mask. */
2439                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2440                         break;
2441                 case RTE_FLOW_ITEM_TYPE_VXLAN:
2442                         size += SZ_NLATTR_TYPE_OF(uint32_t);
2443                         flags |= MLX5_FLOW_LAYER_VXLAN;
2444                         break;
2445                 default:
2446                         DRV_LOG(WARNING,
2447                                 "unsupported item %p type %d,"
2448                                 " items must be validated before flow creation",
2449                                 (const void *)items, items->type);
2450                         break;
2451                 }
2452         }
2453         *item_flags = flags;
2454         return size;
2455 }
2456
2457 /**
2458  * Calculate size of memory to store the VXLAN encapsultion
2459  * related items in the Netlink message buffer. Items list
2460  * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
2461  * The item list should be validated.
2462  *
2463  * @param[in] action
2464  *   RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
2465  *   List of pattern items to scan data from.
2466  *
2467  * @return
2468  *   The size the part of Netlink message buffer to store the
2469  *   VXLAN encapsulation item attributes.
2470  */
2471 static int
2472 flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
2473 {
2474         const struct rte_flow_item *items;
2475         int size = 0;
2476
2477         assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
2478         assert(action->conf);
2479
2480         items = ((const struct rte_flow_action_vxlan_encap *)
2481                                         action->conf)->definition;
2482         assert(items);
2483         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2484                 switch (items->type) {
2485                 case RTE_FLOW_ITEM_TYPE_VOID:
2486                         break;
2487                 case RTE_FLOW_ITEM_TYPE_ETH:
2488                         /* This item does not require message buffer. */
2489                         break;
2490                 case RTE_FLOW_ITEM_TYPE_IPV4:
2491                         size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
2492                         break;
2493                 case RTE_FLOW_ITEM_TYPE_IPV6:
2494                         size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
2495                         break;
2496                 case RTE_FLOW_ITEM_TYPE_UDP: {
2497                         const struct rte_flow_item_udp *udp = items->mask;
2498
2499                         size += SZ_NLATTR_TYPE_OF(uint16_t);
2500                         if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
2501                                 size += SZ_NLATTR_TYPE_OF(uint16_t);
2502                         break;
2503                 }
2504                 case RTE_FLOW_ITEM_TYPE_VXLAN:
2505                         size += SZ_NLATTR_TYPE_OF(uint32_t);
2506                         break;
2507                 default:
2508                         assert(false);
2509                         DRV_LOG(WARNING,
2510                                 "unsupported item %p type %d,"
2511                                 " items must be validated"
2512                                 " before flow creation",
2513                                 (const void *)items, items->type);
2514                         return 0;
2515                 }
2516         }
2517         return size;
2518 }
2519
2520 /**
2521  * Calculate maximum size of memory for flow actions of Linux TC flower and
2522  * extract specified actions.
2523  *
2524  * @param[in] actions
2525  *   Pointer to the list of actions.
2526  * @param[out] action_flags
2527  *   Pointer to the detected actions.
2528  *
2529  * @return
2530  *   Maximum size of memory for actions.
2531  */
2532 static int
2533 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
2534                               uint64_t *action_flags)
2535 {
2536         int size = 0;
2537         uint64_t flags = 0;
2538
2539         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
2540         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2541                 switch (actions->type) {
2542                 case RTE_FLOW_ACTION_TYPE_VOID:
2543                         break;
2544                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2545                         size += SZ_NLATTR_NEST + /* na_act_index. */
2546                                 SZ_NLATTR_STRZ_OF("mirred") +
2547                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2548                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
2549                         flags |= MLX5_FLOW_ACTION_PORT_ID;
2550                         break;
2551                 case RTE_FLOW_ACTION_TYPE_JUMP:
2552                         size += SZ_NLATTR_NEST + /* na_act_index. */
2553                                 SZ_NLATTR_STRZ_OF("gact") +
2554                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2555                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
2556                         flags |= MLX5_FLOW_ACTION_JUMP;
2557                         break;
2558                 case RTE_FLOW_ACTION_TYPE_DROP:
2559                         size += SZ_NLATTR_NEST + /* na_act_index. */
2560                                 SZ_NLATTR_STRZ_OF("gact") +
2561                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2562                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
2563                         flags |= MLX5_FLOW_ACTION_DROP;
2564                         break;
2565                 case RTE_FLOW_ACTION_TYPE_COUNT:
2566                         break;
2567                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2568                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
2569                         goto action_of_vlan;
2570                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2571                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
2572                         goto action_of_vlan;
2573                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2574                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
2575                         goto action_of_vlan;
2576                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2577                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
2578                         goto action_of_vlan;
2579 action_of_vlan:
2580                         size += SZ_NLATTR_NEST + /* na_act_index. */
2581                                 SZ_NLATTR_STRZ_OF("vlan") +
2582                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2583                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
2584                                 SZ_NLATTR_TYPE_OF(uint16_t) +
2585                                 /* VLAN protocol. */
2586                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
2587                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
2588                         break;
2589                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2590                         size += SZ_NLATTR_NEST + /* na_act_index. */
2591                                 SZ_NLATTR_STRZ_OF("tunnel_key") +
2592                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2593                                 SZ_NLATTR_TYPE_OF(uint8_t);
2594                         size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2595                         size += flow_tcf_vxlan_encap_size(actions) +
2596                                 RTE_ALIGN_CEIL /* preceding encap params. */
2597                                 (sizeof(struct flow_tcf_vxlan_encap),
2598                                 MNL_ALIGNTO);
2599                         flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
2600                         break;
2601                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
2602                         size += SZ_NLATTR_NEST + /* na_act_index. */
2603                                 SZ_NLATTR_STRZ_OF("tunnel_key") +
2604                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2605                                 SZ_NLATTR_TYPE_OF(uint8_t);
2606                         size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
2607                         size += RTE_ALIGN_CEIL /* preceding decap params. */
2608                                 (sizeof(struct flow_tcf_vxlan_decap),
2609                                 MNL_ALIGNTO);
2610                         flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
2611                         break;
2612                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2613                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2614                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2615                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2616                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2617                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2618                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2619                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2620                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2621                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2622                         size += flow_tcf_get_pedit_actions_size(&actions,
2623                                                                 &flags);
2624                         break;
2625                 default:
2626                         DRV_LOG(WARNING,
2627                                 "unsupported action %p type %d,"
2628                                 " items must be validated before flow creation",
2629                                 (const void *)actions, actions->type);
2630                         break;
2631                 }
2632         }
2633         *action_flags = flags;
2634         return size;
2635 }
2636
2637 /**
2638  * Brand rtnetlink buffer with unique handle.
2639  *
2640  * This handle should be unique for a given network interface to avoid
2641  * collisions.
2642  *
2643  * @param nlh
2644  *   Pointer to Netlink message.
2645  * @param handle
2646  *   Unique 32-bit handle to use.
2647  */
2648 static void
2649 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
2650 {
2651         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
2652
2653         tcm->tcm_handle = handle;
2654         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
2655                 (void *)nlh, handle);
2656 }
2657
2658 /**
2659  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
2660  * memory required, allocates the memory, initializes Netlink message headers
2661  * and set unique TC message handle.
2662  *
2663  * @param[in] attr
2664  *   Pointer to the flow attributes.
2665  * @param[in] items
2666  *   Pointer to the list of items.
2667  * @param[in] actions
2668  *   Pointer to the list of actions.
2669  * @param[out] item_flags
2670  *   Pointer to bit mask of all items detected.
2671  * @param[out] action_flags
2672  *   Pointer to bit mask of all actions detected.
2673  * @param[out] error
2674  *   Pointer to the error structure.
2675  *
2676  * @return
2677  *   Pointer to mlx5_flow object on success,
2678  *   otherwise NULL and rte_ernno is set.
2679  */
2680 static struct mlx5_flow *
2681 flow_tcf_prepare(const struct rte_flow_attr *attr,
2682                  const struct rte_flow_item items[],
2683                  const struct rte_flow_action actions[],
2684                  uint64_t *item_flags, uint64_t *action_flags,
2685                  struct rte_flow_error *error)
2686 {
2687         size_t size = RTE_ALIGN_CEIL
2688                         (sizeof(struct mlx5_flow),
2689                          alignof(struct flow_tcf_tunnel_hdr)) +
2690                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
2691                       MNL_ALIGN(sizeof(struct tcmsg));
2692         struct mlx5_flow *dev_flow;
2693         struct nlmsghdr *nlh;
2694         struct tcmsg *tcm;
2695         uint8_t *sp, *tun = NULL;
2696
2697         size += flow_tcf_get_items_and_size(attr, items, item_flags);
2698         size += flow_tcf_get_actions_and_size(actions, action_flags);
2699         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
2700         if (!dev_flow) {
2701                 rte_flow_error_set(error, ENOMEM,
2702                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2703                                    "not enough memory to create E-Switch flow");
2704                 return NULL;
2705         }
2706         sp = (uint8_t *)(dev_flow + 1);
2707         if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
2708                 sp = RTE_PTR_ALIGN
2709                         (sp, alignof(struct flow_tcf_tunnel_hdr));
2710                 tun = sp;
2711                 sp += RTE_ALIGN_CEIL
2712                         (sizeof(struct flow_tcf_vxlan_encap),
2713                         MNL_ALIGNTO);
2714 #ifndef NDEBUG
2715                 size -= RTE_ALIGN_CEIL
2716                         (sizeof(struct flow_tcf_vxlan_encap),
2717                         MNL_ALIGNTO);
2718 #endif
2719         } else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2720                 sp = RTE_PTR_ALIGN
2721                         (sp, alignof(struct flow_tcf_tunnel_hdr));
2722                 tun = sp;
2723                 sp += RTE_ALIGN_CEIL
2724                         (sizeof(struct flow_tcf_vxlan_decap),
2725                         MNL_ALIGNTO);
2726 #ifndef NDEBUG
2727                 size -= RTE_ALIGN_CEIL
2728                         (sizeof(struct flow_tcf_vxlan_decap),
2729                         MNL_ALIGNTO);
2730 #endif
2731         } else {
2732                 sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
2733         }
2734         nlh = mnl_nlmsg_put_header(sp);
2735         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2736         *dev_flow = (struct mlx5_flow){
2737                 .tcf = (struct mlx5_flow_tcf){
2738 #ifndef NDEBUG
2739                         .nlsize = size - RTE_ALIGN_CEIL
2740                                 (sizeof(struct mlx5_flow),
2741                                  alignof(struct flow_tcf_tunnel_hdr)),
2742 #endif
2743                         .tunnel = (struct flow_tcf_tunnel_hdr *)tun,
2744                         .nlh = nlh,
2745                         .tcm = tcm,
2746                 },
2747         };
2748         if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
2749                 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
2750         else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
2751                 dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
2752         /*
2753          * Generate a reasonably unique handle based on the address of the
2754          * target buffer.
2755          *
2756          * This is straightforward on 32-bit systems where the flow pointer can
2757          * be used directly. Otherwise, its least significant part is taken
2758          * after shifting it by the previous power of two of the pointed buffer
2759          * size.
2760          */
2761         if (sizeof(dev_flow) <= 4)
2762                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
2763         else
2764                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
2765                                        rte_log2_u32(rte_align32prevpow2(size)));
2766         return dev_flow;
2767 }
2768
2769 /**
2770  * Make adjustments for supporting count actions.
2771  *
2772  * @param[in] dev
2773  *   Pointer to the Ethernet device structure.
2774  * @param[in] dev_flow
2775  *   Pointer to mlx5_flow.
2776  * @param[out] error
2777  *   Pointer to error structure.
2778  *
2779  * @return
2780  *   0 On success else a negative errno value is returned and rte_errno is set.
2781  */
2782 static int
2783 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
2784                                   struct mlx5_flow *dev_flow,
2785                                   struct rte_flow_error *error)
2786 {
2787         struct rte_flow *flow = dev_flow->flow;
2788
2789         if (!flow->counter) {
2790                 flow->counter = flow_tcf_counter_new();
2791                 if (!flow->counter)
2792                         return rte_flow_error_set(error, rte_errno,
2793                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2794                                                   NULL,
2795                                                   "cannot get counter"
2796                                                   " context.");
2797         }
2798         return 0;
2799 }
2800
2801 /**
2802  * Translate flow for Linux TC flower and construct Netlink message.
2803  *
2804  * @param[in] priv
2805  *   Pointer to the priv structure.
2806  * @param[in, out] flow
2807  *   Pointer to the sub flow.
2808  * @param[in] attr
2809  *   Pointer to the flow attributes.
2810  * @param[in] items
2811  *   Pointer to the list of items.
2812  * @param[in] actions
2813  *   Pointer to the list of actions.
2814  * @param[out] error
2815  *   Pointer to the error structure.
2816  *
2817  * @return
2818  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2819  */
2820 static int
2821 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2822                    const struct rte_flow_attr *attr,
2823                    const struct rte_flow_item items[],
2824                    const struct rte_flow_action actions[],
2825                    struct rte_flow_error *error)
2826 {
2827         union {
2828                 const struct rte_flow_item_port_id *port_id;
2829                 const struct rte_flow_item_eth *eth;
2830                 const struct rte_flow_item_vlan *vlan;
2831                 const struct rte_flow_item_ipv4 *ipv4;
2832                 const struct rte_flow_item_ipv6 *ipv6;
2833                 const struct rte_flow_item_tcp *tcp;
2834                 const struct rte_flow_item_udp *udp;
2835         } spec, mask;
2836         union {
2837                 const struct rte_flow_action_port_id *port_id;
2838                 const struct rte_flow_action_jump *jump;
2839                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
2840                 const struct rte_flow_action_of_set_vlan_vid *
2841                         of_set_vlan_vid;
2842                 const struct rte_flow_action_of_set_vlan_pcp *
2843                         of_set_vlan_pcp;
2844         } conf;
2845         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
2846         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
2847         struct tcmsg *tcm = dev_flow->tcf.tcm;
2848         uint32_t na_act_index_cur;
2849         bool eth_type_set = 0;
2850         bool vlan_present = 0;
2851         bool vlan_eth_type_set = 0;
2852         bool ip_proto_set = 0;
2853         struct nlattr *na_flower;
2854         struct nlattr *na_flower_act;
2855         struct nlattr *na_vlan_id = NULL;
2856         struct nlattr *na_vlan_priority = NULL;
2857         uint64_t item_flags = 0;
2858         int ret;
2859
2860         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
2861                                                 PTOI_TABLE_SZ_MAX(dev)));
2862         nlh = dev_flow->tcf.nlh;
2863         tcm = dev_flow->tcf.tcm;
2864         /* Prepare API must have been called beforehand. */
2865         assert(nlh != NULL && tcm != NULL);
2866         tcm->tcm_family = AF_UNSPEC;
2867         tcm->tcm_ifindex = ptoi[0].ifindex;
2868         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
2869         /*
2870          * Priority cannot be zero to prevent the kernel from picking one
2871          * automatically.
2872          */
2873         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
2874                                   RTE_BE16(ETH_P_ALL));
2875         if (attr->group > 0)
2876                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
2877         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
2878         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
2879         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
2880         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2881                 unsigned int i;
2882
2883                 switch (items->type) {
2884                 case RTE_FLOW_ITEM_TYPE_VOID:
2885                         break;
2886                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2887                         mask.port_id = flow_tcf_item_mask
2888                                 (items, &rte_flow_item_port_id_mask,
2889                                  &flow_tcf_mask_supported.port_id,
2890                                  &flow_tcf_mask_empty.port_id,
2891                                  sizeof(flow_tcf_mask_supported.port_id),
2892                                  error);
2893                         assert(mask.port_id);
2894                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
2895                                 break;
2896                         spec.port_id = items->spec;
2897                         if (!mask.port_id->id)
2898                                 i = 0;
2899                         else
2900                                 for (i = 0; ptoi[i].ifindex; ++i)
2901                                         if (ptoi[i].port_id == spec.port_id->id)
2902                                                 break;
2903                         assert(ptoi[i].ifindex);
2904                         tcm->tcm_ifindex = ptoi[i].ifindex;
2905                         break;
2906                 case RTE_FLOW_ITEM_TYPE_ETH:
2907                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
2908                         mask.eth = flow_tcf_item_mask
2909                                 (items, &rte_flow_item_eth_mask,
2910                                  &flow_tcf_mask_supported.eth,
2911                                  &flow_tcf_mask_empty.eth,
2912                                  sizeof(flow_tcf_mask_supported.eth),
2913                                  error);
2914                         assert(mask.eth);
2915                         if (mask.eth == &flow_tcf_mask_empty.eth)
2916                                 break;
2917                         spec.eth = items->spec;
2918                         if (mask.eth->type) {
2919                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2920                                                  spec.eth->type);
2921                                 eth_type_set = 1;
2922                         }
2923                         if (!is_zero_ether_addr(&mask.eth->dst)) {
2924                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
2925                                              ETHER_ADDR_LEN,
2926                                              spec.eth->dst.addr_bytes);
2927                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
2928                                              ETHER_ADDR_LEN,
2929                                              mask.eth->dst.addr_bytes);
2930                         }
2931                         if (!is_zero_ether_addr(&mask.eth->src)) {
2932                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
2933                                              ETHER_ADDR_LEN,
2934                                              spec.eth->src.addr_bytes);
2935                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
2936                                              ETHER_ADDR_LEN,
2937                                              mask.eth->src.addr_bytes);
2938                         }
2939                         break;
2940                 case RTE_FLOW_ITEM_TYPE_VLAN:
2941                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2942                         mask.vlan = flow_tcf_item_mask
2943                                 (items, &rte_flow_item_vlan_mask,
2944                                  &flow_tcf_mask_supported.vlan,
2945                                  &flow_tcf_mask_empty.vlan,
2946                                  sizeof(flow_tcf_mask_supported.vlan),
2947                                  error);
2948                         assert(mask.vlan);
2949                         if (!eth_type_set)
2950                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2951                                                  RTE_BE16(ETH_P_8021Q));
2952                         eth_type_set = 1;
2953                         vlan_present = 1;
2954                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
2955                                 break;
2956                         spec.vlan = items->spec;
2957                         if (mask.vlan->inner_type) {
2958                                 mnl_attr_put_u16(nlh,
2959                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
2960                                                  spec.vlan->inner_type);
2961                                 vlan_eth_type_set = 1;
2962                         }
2963                         if (mask.vlan->tci & RTE_BE16(0xe000))
2964                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
2965                                                 (rte_be_to_cpu_16
2966                                                  (spec.vlan->tci) >> 13) & 0x7);
2967                         if (mask.vlan->tci & RTE_BE16(0x0fff))
2968                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
2969                                                  rte_be_to_cpu_16
2970                                                  (spec.vlan->tci &
2971                                                   RTE_BE16(0x0fff)));
2972                         break;
2973                 case RTE_FLOW_ITEM_TYPE_IPV4:
2974                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2975                         mask.ipv4 = flow_tcf_item_mask
2976                                 (items, &rte_flow_item_ipv4_mask,
2977                                  &flow_tcf_mask_supported.ipv4,
2978                                  &flow_tcf_mask_empty.ipv4,
2979                                  sizeof(flow_tcf_mask_supported.ipv4),
2980                                  error);
2981                         assert(mask.ipv4);
2982                         if (!eth_type_set || !vlan_eth_type_set)
2983                                 mnl_attr_put_u16(nlh,
2984                                                  vlan_present ?
2985                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2986                                                  TCA_FLOWER_KEY_ETH_TYPE,
2987                                                  RTE_BE16(ETH_P_IP));
2988                         eth_type_set = 1;
2989                         vlan_eth_type_set = 1;
2990                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
2991                                 break;
2992                         spec.ipv4 = items->spec;
2993                         if (mask.ipv4->hdr.next_proto_id) {
2994                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2995                                                 spec.ipv4->hdr.next_proto_id);
2996                                 ip_proto_set = 1;
2997                         }
2998                         if (mask.ipv4->hdr.src_addr) {
2999                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
3000                                                  spec.ipv4->hdr.src_addr);
3001                                 mnl_attr_put_u32(nlh,
3002                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
3003                                                  mask.ipv4->hdr.src_addr);
3004                         }
3005                         if (mask.ipv4->hdr.dst_addr) {
3006                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
3007                                                  spec.ipv4->hdr.dst_addr);
3008                                 mnl_attr_put_u32(nlh,
3009                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
3010                                                  mask.ipv4->hdr.dst_addr);
3011                         }
3012                         break;
3013                 case RTE_FLOW_ITEM_TYPE_IPV6:
3014                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3015                         mask.ipv6 = flow_tcf_item_mask
3016                                 (items, &rte_flow_item_ipv6_mask,
3017                                  &flow_tcf_mask_supported.ipv6,
3018                                  &flow_tcf_mask_empty.ipv6,
3019                                  sizeof(flow_tcf_mask_supported.ipv6),
3020                                  error);
3021                         assert(mask.ipv6);
3022                         if (!eth_type_set || !vlan_eth_type_set)
3023                                 mnl_attr_put_u16(nlh,
3024                                                  vlan_present ?
3025                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
3026                                                  TCA_FLOWER_KEY_ETH_TYPE,
3027                                                  RTE_BE16(ETH_P_IPV6));
3028                         eth_type_set = 1;
3029                         vlan_eth_type_set = 1;
3030                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
3031                                 break;
3032                         spec.ipv6 = items->spec;
3033                         if (mask.ipv6->hdr.proto) {
3034                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3035                                                 spec.ipv6->hdr.proto);
3036                                 ip_proto_set = 1;
3037                         }
3038                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
3039                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
3040                                              sizeof(spec.ipv6->hdr.src_addr),
3041                                              spec.ipv6->hdr.src_addr);
3042                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
3043                                              sizeof(mask.ipv6->hdr.src_addr),
3044                                              mask.ipv6->hdr.src_addr);
3045                         }
3046                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
3047                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
3048                                              sizeof(spec.ipv6->hdr.dst_addr),
3049                                              spec.ipv6->hdr.dst_addr);
3050                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
3051                                              sizeof(mask.ipv6->hdr.dst_addr),
3052                                              mask.ipv6->hdr.dst_addr);
3053                         }
3054                         break;
3055                 case RTE_FLOW_ITEM_TYPE_UDP:
3056                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3057                         mask.udp = flow_tcf_item_mask
3058                                 (items, &rte_flow_item_udp_mask,
3059                                  &flow_tcf_mask_supported.udp,
3060                                  &flow_tcf_mask_empty.udp,
3061                                  sizeof(flow_tcf_mask_supported.udp),
3062                                  error);
3063                         assert(mask.udp);
3064                         if (!ip_proto_set)
3065                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3066                                                 IPPROTO_UDP);
3067                         if (mask.udp == &flow_tcf_mask_empty.udp)
3068                                 break;
3069                         spec.udp = items->spec;
3070                         if (mask.udp->hdr.src_port) {
3071                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
3072                                                  spec.udp->hdr.src_port);
3073                                 mnl_attr_put_u16(nlh,
3074                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
3075                                                  mask.udp->hdr.src_port);
3076                         }
3077                         if (mask.udp->hdr.dst_port) {
3078                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
3079                                                  spec.udp->hdr.dst_port);
3080                                 mnl_attr_put_u16(nlh,
3081                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
3082                                                  mask.udp->hdr.dst_port);
3083                         }
3084                         break;
3085                 case RTE_FLOW_ITEM_TYPE_TCP:
3086                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3087                         mask.tcp = flow_tcf_item_mask
3088                                 (items, &rte_flow_item_tcp_mask,
3089                                  &flow_tcf_mask_supported.tcp,
3090                                  &flow_tcf_mask_empty.tcp,
3091                                  sizeof(flow_tcf_mask_supported.tcp),
3092                                  error);
3093                         assert(mask.tcp);
3094                         if (!ip_proto_set)
3095                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
3096                                                 IPPROTO_TCP);
3097                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
3098                                 break;
3099                         spec.tcp = items->spec;
3100                         if (mask.tcp->hdr.src_port) {
3101                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
3102                                                  spec.tcp->hdr.src_port);
3103                                 mnl_attr_put_u16(nlh,
3104                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
3105                                                  mask.tcp->hdr.src_port);
3106                         }
3107                         if (mask.tcp->hdr.dst_port) {
3108                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
3109                                                  spec.tcp->hdr.dst_port);
3110                                 mnl_attr_put_u16(nlh,
3111                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
3112                                                  mask.tcp->hdr.dst_port);
3113                         }
3114                         if (mask.tcp->hdr.tcp_flags) {
3115                                 mnl_attr_put_u16
3116                                         (nlh,
3117                                          TCA_FLOWER_KEY_TCP_FLAGS,
3118                                          rte_cpu_to_be_16
3119                                                 (spec.tcp->hdr.tcp_flags));
3120                                 mnl_attr_put_u16
3121                                         (nlh,
3122                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
3123                                          rte_cpu_to_be_16
3124                                                 (mask.tcp->hdr.tcp_flags));
3125                         }
3126                         break;
3127                 default:
3128                         return rte_flow_error_set(error, ENOTSUP,
3129                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3130                                                   NULL, "item not supported");
3131                 }
3132         }
3133         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
3134         na_act_index_cur = 1;
3135         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3136                 struct nlattr *na_act_index;
3137                 struct nlattr *na_act;
3138                 unsigned int vlan_act;
3139                 unsigned int i;
3140
3141                 switch (actions->type) {
3142                 case RTE_FLOW_ACTION_TYPE_VOID:
3143                         break;
3144                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
3145                         conf.port_id = actions->conf;
3146                         if (conf.port_id->original)
3147                                 i = 0;
3148                         else
3149                                 for (i = 0; ptoi[i].ifindex; ++i)
3150                                         if (ptoi[i].port_id == conf.port_id->id)
3151                                                 break;
3152                         assert(ptoi[i].ifindex);
3153                         na_act_index =
3154                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3155                         assert(na_act_index);
3156                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
3157                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3158                         assert(na_act);
3159                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
3160                                      sizeof(struct tc_mirred),
3161                                      &(struct tc_mirred){
3162                                         .action = TC_ACT_STOLEN,
3163                                         .eaction = TCA_EGRESS_REDIR,
3164                                         .ifindex = ptoi[i].ifindex,
3165                                      });
3166                         mnl_attr_nest_end(nlh, na_act);
3167                         mnl_attr_nest_end(nlh, na_act_index);
3168                         break;
3169                 case RTE_FLOW_ACTION_TYPE_JUMP:
3170                         conf.jump = actions->conf;
3171                         na_act_index =
3172                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3173                         assert(na_act_index);
3174                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3175                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3176                         assert(na_act);
3177                         mnl_attr_put(nlh, TCA_GACT_PARMS,
3178                                      sizeof(struct tc_gact),
3179                                      &(struct tc_gact){
3180                                         .action = TC_ACT_GOTO_CHAIN |
3181                                                   conf.jump->group,
3182                                      });
3183                         mnl_attr_nest_end(nlh, na_act);
3184                         mnl_attr_nest_end(nlh, na_act_index);
3185                         break;
3186                 case RTE_FLOW_ACTION_TYPE_DROP:
3187                         na_act_index =
3188                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3189                         assert(na_act_index);
3190                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3191                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3192                         assert(na_act);
3193                         mnl_attr_put(nlh, TCA_GACT_PARMS,
3194                                      sizeof(struct tc_gact),
3195                                      &(struct tc_gact){
3196                                         .action = TC_ACT_SHOT,
3197                                      });
3198                         mnl_attr_nest_end(nlh, na_act);
3199                         mnl_attr_nest_end(nlh, na_act_index);
3200                         break;
3201                 case RTE_FLOW_ACTION_TYPE_COUNT:
3202                         /*
3203                          * Driver adds the count action implicitly for
3204                          * each rule it creates.
3205                          */
3206                         ret = flow_tcf_translate_action_count(dev,
3207                                                               dev_flow, error);
3208                         if (ret < 0)
3209                                 return ret;
3210                         break;
3211                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3212                         conf.of_push_vlan = NULL;
3213                         vlan_act = TCA_VLAN_ACT_POP;
3214                         goto action_of_vlan;
3215                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3216                         conf.of_push_vlan = actions->conf;
3217                         vlan_act = TCA_VLAN_ACT_PUSH;
3218                         goto action_of_vlan;
3219                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3220                         conf.of_set_vlan_vid = actions->conf;
3221                         if (na_vlan_id)
3222                                 goto override_na_vlan_id;
3223                         vlan_act = TCA_VLAN_ACT_MODIFY;
3224                         goto action_of_vlan;
3225                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3226                         conf.of_set_vlan_pcp = actions->conf;
3227                         if (na_vlan_priority)
3228                                 goto override_na_vlan_priority;
3229                         vlan_act = TCA_VLAN_ACT_MODIFY;
3230                         goto action_of_vlan;
3231 action_of_vlan:
3232                         na_act_index =
3233                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3234                         assert(na_act_index);
3235                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
3236                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3237                         assert(na_act);
3238                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
3239                                      sizeof(struct tc_vlan),
3240                                      &(struct tc_vlan){
3241                                         .action = TC_ACT_PIPE,
3242                                         .v_action = vlan_act,
3243                                      });
3244                         if (vlan_act == TCA_VLAN_ACT_POP) {
3245                                 mnl_attr_nest_end(nlh, na_act);
3246                                 mnl_attr_nest_end(nlh, na_act_index);
3247                                 break;
3248                         }
3249                         if (vlan_act == TCA_VLAN_ACT_PUSH)
3250                                 mnl_attr_put_u16(nlh,
3251                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
3252                                                  conf.of_push_vlan->ethertype);
3253                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
3254                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
3255                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
3256                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
3257                         mnl_attr_nest_end(nlh, na_act);
3258                         mnl_attr_nest_end(nlh, na_act_index);
3259                         if (actions->type ==
3260                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
3261 override_na_vlan_id:
3262                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
3263                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
3264                                         rte_be_to_cpu_16
3265                                         (conf.of_set_vlan_vid->vlan_vid);
3266                         } else if (actions->type ==
3267                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
3268 override_na_vlan_priority:
3269                                 na_vlan_priority->nla_type =
3270                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
3271                                 *(uint8_t *)mnl_attr_get_payload
3272                                         (na_vlan_priority) =
3273                                         conf.of_set_vlan_pcp->vlan_pcp;
3274                         }
3275                         break;
3276                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3277                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3278                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3279                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3280                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3281                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3282                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
3283                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3284                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3285                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3286                         na_act_index =
3287                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3288                         flow_tcf_create_pedit_mnl_msg(nlh,
3289                                                       &actions, item_flags);
3290                         mnl_attr_nest_end(nlh, na_act_index);
3291                         break;
3292                 default:
3293                         return rte_flow_error_set(error, ENOTSUP,
3294                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3295                                                   actions,
3296                                                   "action not supported");
3297                 }
3298         }
3299         assert(na_flower);
3300         assert(na_flower_act);
3301         mnl_attr_nest_end(nlh, na_flower_act);
3302         mnl_attr_nest_end(nlh, na_flower);
3303         return 0;
3304 }
3305
3306 /**
3307  * Send Netlink message with acknowledgment.
3308  *
3309  * @param ctx
3310  *   Flow context to use.
3311  * @param nlh
3312  *   Message to send. This function always raises the NLM_F_ACK flag before
3313  *   sending.
3314  *
3315  * @return
3316  *   0 on success, a negative errno value otherwise and rte_errno is set.
3317  */
3318 static int
3319 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
3320 {
3321         alignas(struct nlmsghdr)
3322         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
3323                     nlh->nlmsg_len - sizeof(*nlh)];
3324         uint32_t seq = ctx->seq++;
3325         struct mnl_socket *nl = ctx->nl;
3326         int ret;
3327
3328         nlh->nlmsg_flags |= NLM_F_ACK;
3329         nlh->nlmsg_seq = seq;
3330         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
3331         if (ret != -1)
3332                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
3333         if (ret != -1)
3334                 ret = mnl_cb_run
3335                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
3336         if (ret > 0)
3337                 return 0;
3338         rte_errno = errno;
3339         return -rte_errno;
3340 }
3341
3342 /**
3343  * Apply flow to E-Switch by sending Netlink message.
3344  *
3345  * @param[in] dev
3346  *   Pointer to Ethernet device.
3347  * @param[in, out] flow
3348  *   Pointer to the sub flow.
3349  * @param[out] error
3350  *   Pointer to the error structure.
3351  *
3352  * @return
3353  *   0 on success, a negative errno value otherwise and rte_ernno is set.
3354  */
3355 static int
3356 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3357                struct rte_flow_error *error)
3358 {
3359         struct priv *priv = dev->data->dev_private;
3360         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3361         struct mlx5_flow *dev_flow;
3362         struct nlmsghdr *nlh;
3363
3364         dev_flow = LIST_FIRST(&flow->dev_flows);
3365         /* E-Switch flow can't be expanded. */
3366         assert(!LIST_NEXT(dev_flow, next));
3367         nlh = dev_flow->tcf.nlh;
3368         nlh->nlmsg_type = RTM_NEWTFILTER;
3369         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3370         if (!flow_tcf_nl_ack(ctx, nlh))
3371                 return 0;
3372         return rte_flow_error_set(error, rte_errno,
3373                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3374                                   "netlink: failed to create TC flow rule");
3375 }
3376
3377 /**
3378  * Remove flow from E-Switch by sending Netlink message.
3379  *
3380  * @param[in] dev
3381  *   Pointer to Ethernet device.
3382  * @param[in, out] flow
3383  *   Pointer to the sub flow.
3384  */
3385 static void
3386 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3387 {
3388         struct priv *priv = dev->data->dev_private;
3389         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3390         struct mlx5_flow *dev_flow;
3391         struct nlmsghdr *nlh;
3392
3393         if (!flow)
3394                 return;
3395         if (flow->counter) {
3396                 if (--flow->counter->ref_cnt == 0) {
3397                         rte_free(flow->counter);
3398                         flow->counter = NULL;
3399                 }
3400         }
3401         dev_flow = LIST_FIRST(&flow->dev_flows);
3402         if (!dev_flow)
3403                 return;
3404         /* E-Switch flow can't be expanded. */
3405         assert(!LIST_NEXT(dev_flow, next));
3406         nlh = dev_flow->tcf.nlh;
3407         nlh->nlmsg_type = RTM_DELTFILTER;
3408         nlh->nlmsg_flags = NLM_F_REQUEST;
3409         flow_tcf_nl_ack(ctx, nlh);
3410 }
3411
3412 /**
3413  * Remove flow from E-Switch and release resources of the device flow.
3414  *
3415  * @param[in] dev
3416  *   Pointer to Ethernet device.
3417  * @param[in, out] flow
3418  *   Pointer to the sub flow.
3419  */
3420 static void
3421 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3422 {
3423         struct mlx5_flow *dev_flow;
3424
3425         if (!flow)
3426                 return;
3427         flow_tcf_remove(dev, flow);
3428         dev_flow = LIST_FIRST(&flow->dev_flows);
3429         if (!dev_flow)
3430                 return;
3431         /* E-Switch flow can't be expanded. */
3432         assert(!LIST_NEXT(dev_flow, next));
3433         LIST_REMOVE(dev_flow, next);
3434         rte_free(dev_flow);
3435 }
3436
3437 /**
3438  * Helper routine for figuring the space size required for a parse buffer.
3439  *
3440  * @param array
3441  *   array of values to use.
3442  * @param idx
3443  *   Current location in array.
3444  * @param value
3445  *   Value to compare with.
3446  *
3447  * @return
3448  *   The maximum between the given value and the array value on index.
3449  */
3450 static uint16_t
3451 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
3452 {
3453         return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
3454 }
3455
3456 /**
3457  * Parse rtnetlink message attributes filling the attribute table with the info
3458  * retrieved.
3459  *
3460  * @param tb
3461  *   Attribute table to be filled.
3462  * @param[out] max
3463  *   Maxinum entry in the attribute table.
3464  * @param rte
3465  *   The attributes section in the message to be parsed.
3466  * @param len
3467  *   The length of the attributes section in the message.
3468  */
3469 static void
3470 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
3471                          struct rtattr *rta, int len)
3472 {
3473         unsigned short type;
3474         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3475         while (RTA_OK(rta, len)) {
3476                 type = rta->rta_type;
3477                 if (type <= max && !tb[type])
3478                         tb[type] = rta;
3479                 rta = RTA_NEXT(rta, len);
3480         }
3481 }
3482
3483 /**
3484  * Extract flow counters from flower action.
3485  *
3486  * @param rta
3487  *   flower action stats properties in the Netlink message received.
3488  * @param rta_type
3489  *   The backward sequence of rta_types, as written in the attribute table,
3490  *   we need to traverse in order to get to the requested object.
3491  * @param idx
3492  *   Current location in rta_type table.
3493  * @param[out] data
3494  *   data holding the count statistics of the rte_flow retrieved from
3495  *   the message.
3496  *
3497  * @return
3498  *   0 if data was found and retrieved, -1 otherwise.
3499  */
3500 static int
3501 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
3502                                        uint16_t rta_type[], int idx,
3503                                        struct gnet_stats_basic *data)
3504 {
3505         int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
3506                                                  TCA_STATS_BASIC);
3507         struct rtattr *tbs[tca_stats_max + 1];
3508
3509         if (rta == NULL || idx < 0)
3510                 return -1;
3511         flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
3512                                  RTA_DATA(rta), RTA_PAYLOAD(rta));
3513         switch (rta_type[idx]) {
3514         case TCA_STATS_BASIC:
3515                 if (tbs[TCA_STATS_BASIC]) {
3516                         memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
3517                                RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
3518                                sizeof(*data)));
3519                         return 0;
3520                 }
3521                 break;
3522         default:
3523                 break;
3524         }
3525         return -1;
3526 }
3527
3528 /**
3529  * Parse flower single action retrieving the requested action attribute,
3530  * if found.
3531  *
3532  * @param arg
3533  *   flower action properties in the Netlink message received.
3534  * @param rta_type
3535  *   The backward sequence of rta_types, as written in the attribute table,
3536  *   we need to traverse in order to get to the requested object.
3537  * @param idx
3538  *   Current location in rta_type table.
3539  * @param[out] data
3540  *   Count statistics retrieved from the message query.
3541  *
3542  * @return
3543  *   0 if data was found and retrieved, -1 otherwise.
3544  */
3545 static int
3546 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
3547                                      uint16_t rta_type[], int idx, void *data)
3548 {
3549         int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
3550         struct rtattr *tb[tca_act_max + 1];
3551
3552         if (arg == NULL || idx < 0)
3553                 return -1;
3554         flow_tcf_nl_parse_rtattr(tb, tca_act_max,
3555                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
3556         if (tb[TCA_ACT_KIND] == NULL)
3557                 return -1;
3558         switch (rta_type[idx]) {
3559         case TCA_ACT_STATS:
3560                 if (tb[TCA_ACT_STATS])
3561                         return flow_tcf_nl_action_stats_parse_and_get
3562                                         (tb[TCA_ACT_STATS],
3563                                          rta_type, --idx,
3564                                          (struct gnet_stats_basic *)data);
3565                 break;
3566         default:
3567                 break;
3568         }
3569         return -1;
3570 }
3571
3572 /**
3573  * Parse flower action section in the message retrieving the requested
3574  * attribute from the first action that provides it.
3575  *
3576  * @param opt
3577  *   flower section in the Netlink message received.
3578  * @param rta_type
3579  *   The backward sequence of rta_types, as written in the attribute table,
3580  *   we need to traverse in order to get to the requested object.
3581  * @param idx
3582  *   Current location in rta_type table.
3583  * @param[out] data
3584  *   data retrieved from the message query.
3585  *
3586  * @return
3587  *   0 if data was found and retrieved, -1 otherwise.
3588  */
3589 static int
3590 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
3591                                  uint16_t rta_type[], int idx, void *data)
3592 {
3593         struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
3594         int i;
3595
3596         if (arg == NULL || idx < 0)
3597                 return -1;
3598         flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
3599                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
3600         switch (rta_type[idx]) {
3601         /*
3602          * flow counters are stored in the actions defined by the flow
3603          * and not in the flow itself, therefore we need to traverse the
3604          * flower chain of actions in search for them.
3605          *
3606          * Note that the index is not decremented here.
3607          */
3608         case TCA_ACT_STATS:
3609                 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
3610                         if (tb[i] &&
3611                         !flow_tcf_nl_parse_one_action_and_get(tb[i],
3612                                                               rta_type,
3613                                                               idx, data))
3614                                 return 0;
3615                 }
3616                 break;
3617         default:
3618                 break;
3619         }
3620         return -1;
3621 }
3622
3623 /**
3624  * Parse flower classifier options in the message, retrieving the requested
3625  * attribute if found.
3626  *
3627  * @param opt
3628  *   flower section in the Netlink message received.
3629  * @param rta_type
3630  *   The backward sequence of rta_types, as written in the attribute table,
3631  *   we need to traverse in order to get to the requested object.
3632  * @param idx
3633  *   Current location in rta_type table.
3634  * @param[out] data
3635  *   data retrieved from the message query.
3636  *
3637  * @return
3638  *   0 if data was found and retrieved, -1 otherwise.
3639  */
3640 static int
3641 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
3642                                uint16_t rta_type[], int idx, void *data)
3643 {
3644         int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
3645                                                   TCA_FLOWER_ACT);
3646         struct rtattr *tb[tca_flower_max + 1];
3647
3648         if (!opt || idx < 0)
3649                 return -1;
3650         flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
3651                                  RTA_DATA(opt), RTA_PAYLOAD(opt));
3652         switch (rta_type[idx]) {
3653         case TCA_FLOWER_ACT:
3654                 if (tb[TCA_FLOWER_ACT])
3655                         return flow_tcf_nl_action_parse_and_get
3656                                                         (tb[TCA_FLOWER_ACT],
3657                                                          rta_type, --idx, data);
3658                 break;
3659         default:
3660                 break;
3661         }
3662         return -1;
3663 }
3664
3665 /**
3666  * Parse Netlink reply on filter query, retrieving the flow counters.
3667  *
3668  * @param nlh
3669  *   Message received from Netlink.
3670  * @param rta_type
3671  *   The backward sequence of rta_types, as written in the attribute table,
3672  *   we need to traverse in order to get to the requested object.
3673  * @param idx
3674  *   Current location in rta_type table.
3675  * @param[out] data
3676  *   data retrieved from the message query.
3677  *
3678  * @return
3679  *   0 if data was found and retrieved, -1 otherwise.
3680  */
3681 static int
3682 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
3683                                  uint16_t rta_type[], int idx, void *data)
3684 {
3685         struct nlmsghdr *nlh = cnlh;
3686         struct tcmsg *t = NLMSG_DATA(nlh);
3687         int len = nlh->nlmsg_len;
3688         int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
3689         struct rtattr *tb[tca_max + 1];
3690
3691         if (idx < 0)
3692                 return -1;
3693         if (nlh->nlmsg_type != RTM_NEWTFILTER &&
3694             nlh->nlmsg_type != RTM_GETTFILTER &&
3695             nlh->nlmsg_type != RTM_DELTFILTER)
3696                 return -1;
3697         len -= NLMSG_LENGTH(sizeof(*t));
3698         if (len < 0)
3699                 return -1;
3700         flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
3701         /* Not a TC flower flow - bail out */
3702         if (!tb[TCA_KIND] ||
3703             strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
3704                 return -1;
3705         switch (rta_type[idx]) {
3706         case TCA_OPTIONS:
3707                 if (tb[TCA_OPTIONS])
3708                         return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
3709                                                               rta_type,
3710                                                               --idx, data);
3711                 break;
3712         default:
3713                 break;
3714         }
3715         return -1;
3716 }
3717
3718 /**
3719  * A callback to parse Netlink reply on TC flower query.
3720  *
3721  * @param nlh
3722  *   Message received from Netlink.
3723  * @param[out] data
3724  *   Pointer to data area to be filled by the parsing routine.
3725  *   assumed to be a pinter to struct flow_tcf_stats_basic.
3726  *
3727  * @return
3728  *   MNL_CB_OK value.
3729  */
3730 static int
3731 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
3732 {
3733         /*
3734          * The backward sequence of rta_types to pass in order to get
3735          *  to the counters.
3736          */
3737         uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
3738                                 TCA_FLOWER_ACT, TCA_OPTIONS };
3739         struct flow_tcf_stats_basic *sb_data = data;
3740         union {
3741                 const struct nlmsghdr *c;
3742                 struct nlmsghdr *nc;
3743         } tnlh = { .c = nlh };
3744
3745         if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
3746                                               RTE_DIM(rta_type) - 1,
3747                                               (void *)&sb_data->counters))
3748                 sb_data->valid = true;
3749         return MNL_CB_OK;
3750 }
3751
3752 /**
3753  * Query a TC flower rule for its statistics via netlink.
3754  *
3755  * @param[in] dev
3756  *   Pointer to Ethernet device.
3757  * @param[in] flow
3758  *   Pointer to the sub flow.
3759  * @param[out] data
3760  *   data retrieved by the query.
3761  * @param[out] error
3762  *   Perform verbose error reporting if not NULL.
3763  *
3764  * @return
3765  *   0 on success, a negative errno value otherwise and rte_errno is set.
3766  */
3767 static int
3768 flow_tcf_query_count(struct rte_eth_dev *dev,
3769                           struct rte_flow *flow,
3770                           void *data,
3771                           struct rte_flow_error *error)
3772 {
3773         struct flow_tcf_stats_basic sb_data = { 0 };
3774         struct rte_flow_query_count *qc = data;
3775         struct priv *priv = dev->data->dev_private;
3776         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3777         struct mnl_socket *nl = ctx->nl;
3778         struct mlx5_flow *dev_flow;
3779         struct nlmsghdr *nlh;
3780         uint32_t seq = priv->tcf_context->seq++;
3781         ssize_t ret;
3782         assert(qc);
3783
3784         dev_flow = LIST_FIRST(&flow->dev_flows);
3785         /* E-Switch flow can't be expanded. */
3786         assert(!LIST_NEXT(dev_flow, next));
3787         if (!dev_flow->flow->counter)
3788                 goto notsup_exit;
3789         nlh = dev_flow->tcf.nlh;
3790         nlh->nlmsg_type = RTM_GETTFILTER;
3791         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
3792         nlh->nlmsg_seq = seq;
3793         if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
3794                 goto error_exit;
3795         do {
3796                 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
3797                 if (ret <= 0)
3798                         break;
3799                 ret = mnl_cb_run(ctx->buf, ret, seq,
3800                                  mnl_socket_get_portid(nl),
3801                                  flow_tcf_nl_message_get_stats_basic,
3802                                  (void *)&sb_data);
3803         } while (ret > 0);
3804         /* Return the delta from last reset. */
3805         if (sb_data.valid) {
3806                 /* Return the delta from last reset. */
3807                 qc->hits_set = 1;
3808                 qc->bytes_set = 1;
3809                 qc->hits = sb_data.counters.packets - flow->counter->hits;
3810                 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
3811                 if (qc->reset) {
3812                         flow->counter->hits = sb_data.counters.packets;
3813                         flow->counter->bytes = sb_data.counters.bytes;
3814                 }
3815                 return 0;
3816         }
3817         return rte_flow_error_set(error, EINVAL,
3818                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3819                                   NULL,
3820                                   "flow does not have counter");
3821 error_exit:
3822         return rte_flow_error_set
3823                         (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3824                          NULL, "netlink: failed to read flow rule counters");
3825 notsup_exit:
3826         return rte_flow_error_set
3827                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3828                          NULL, "counters are not available.");
3829 }
3830
3831 /**
3832  * Query a flow.
3833  *
3834  * @see rte_flow_query()
3835  * @see rte_flow_ops
3836  */
3837 static int
3838 flow_tcf_query(struct rte_eth_dev *dev,
3839                struct rte_flow *flow,
3840                const struct rte_flow_action *actions,
3841                void *data,
3842                struct rte_flow_error *error)
3843 {
3844         int ret = -EINVAL;
3845
3846         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3847                 switch (actions->type) {
3848                 case RTE_FLOW_ACTION_TYPE_VOID:
3849                         break;
3850                 case RTE_FLOW_ACTION_TYPE_COUNT:
3851                         ret = flow_tcf_query_count(dev, flow, data, error);
3852                         break;
3853                 default:
3854                         return rte_flow_error_set(error, ENOTSUP,
3855                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3856                                                   actions,
3857                                                   "action not supported");
3858                 }
3859         }
3860         return ret;
3861 }
3862
3863 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
3864         .validate = flow_tcf_validate,
3865         .prepare = flow_tcf_prepare,
3866         .translate = flow_tcf_translate,
3867         .apply = flow_tcf_apply,
3868         .remove = flow_tcf_remove,
3869         .destroy = flow_tcf_destroy,
3870         .query = flow_tcf_query,
3871 };
3872
3873 /**
3874  * Create and configure a libmnl socket for Netlink flow rules.
3875  *
3876  * @return
3877  *   A valid libmnl socket object pointer on success, NULL otherwise and
3878  *   rte_errno is set.
3879  */
3880 static struct mnl_socket *
3881 flow_tcf_mnl_socket_create(void)
3882 {
3883         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
3884
3885         if (nl) {
3886                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
3887                                       sizeof(int));
3888                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
3889                         return nl;
3890         }
3891         rte_errno = errno;
3892         if (nl)
3893                 mnl_socket_close(nl);
3894         return NULL;
3895 }
3896
3897 /**
3898  * Destroy a libmnl socket.
3899  *
3900  * @param nl
3901  *   Libmnl socket of the @p NETLINK_ROUTE kind.
3902  */
3903 static void
3904 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
3905 {
3906         if (nl)
3907                 mnl_socket_close(nl);
3908 }
3909
3910 /**
3911  * Initialize ingress qdisc of a given network interface.
3912  *
3913  * @param ctx
3914  *   Pointer to tc-flower context to use.
3915  * @param ifindex
3916  *   Index of network interface to initialize.
3917  * @param[out] error
3918  *   Perform verbose error reporting if not NULL.
3919  *
3920  * @return
3921  *   0 on success, a negative errno value otherwise and rte_errno is set.
3922  */
3923 int
3924 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
3925                    unsigned int ifindex, struct rte_flow_error *error)
3926 {
3927         struct nlmsghdr *nlh;
3928         struct tcmsg *tcm;
3929         alignas(struct nlmsghdr)
3930         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
3931
3932         /* Destroy existing ingress qdisc and everything attached to it. */
3933         nlh = mnl_nlmsg_put_header(buf);
3934         nlh->nlmsg_type = RTM_DELQDISC;
3935         nlh->nlmsg_flags = NLM_F_REQUEST;
3936         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3937         tcm->tcm_family = AF_UNSPEC;
3938         tcm->tcm_ifindex = ifindex;
3939         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3940         tcm->tcm_parent = TC_H_INGRESS;
3941         /* Ignore errors when qdisc is already absent. */
3942         if (flow_tcf_nl_ack(ctx, nlh) &&
3943             rte_errno != EINVAL && rte_errno != ENOENT)
3944                 return rte_flow_error_set(error, rte_errno,
3945                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3946                                           "netlink: failed to remove ingress"
3947                                           " qdisc");
3948         /* Create fresh ingress qdisc. */
3949         nlh = mnl_nlmsg_put_header(buf);
3950         nlh->nlmsg_type = RTM_NEWQDISC;
3951         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3952         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3953         tcm->tcm_family = AF_UNSPEC;
3954         tcm->tcm_ifindex = ifindex;
3955         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3956         tcm->tcm_parent = TC_H_INGRESS;
3957         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
3958         if (flow_tcf_nl_ack(ctx, nlh))
3959                 return rte_flow_error_set(error, rte_errno,
3960                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3961                                           "netlink: failed to create ingress"
3962                                           " qdisc");
3963         return 0;
3964 }
3965
3966 /**
3967  * Create libmnl context for Netlink flow rules.
3968  *
3969  * @return
3970  *   A valid libmnl socket object pointer on success, NULL otherwise and
3971  *   rte_errno is set.
3972  */
3973 struct mlx5_flow_tcf_context *
3974 mlx5_flow_tcf_context_create(void)
3975 {
3976         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
3977                                                         sizeof(*ctx),
3978                                                         sizeof(uint32_t));
3979         if (!ctx)
3980                 goto error;
3981         ctx->nl = flow_tcf_mnl_socket_create();
3982         if (!ctx->nl)
3983                 goto error;
3984         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
3985         ctx->buf = rte_zmalloc(__func__,
3986                                ctx->buf_size, sizeof(uint32_t));
3987         if (!ctx->buf)
3988                 goto error;
3989         ctx->seq = random();
3990         return ctx;
3991 error:
3992         mlx5_flow_tcf_context_destroy(ctx);
3993         return NULL;
3994 }
3995
3996 /**
3997  * Destroy a libmnl context.
3998  *
3999  * @param ctx
4000  *   Libmnl socket of the @p NETLINK_ROUTE kind.
4001  */
4002 void
4003 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
4004 {
4005         if (!ctx)
4006                 return;
4007         flow_tcf_mnl_socket_destroy(ctx->nl);
4008         rte_free(ctx->buf);
4009         rte_free(ctx);
4010 }