net/mlx5: add E-Switch VXLAN to validation routine
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/socket.h>
24
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
28 #include <rte_flow.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31
32 #include "mlx5.h"
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
35
36 #ifdef HAVE_TC_ACT_VLAN
37
38 #include <linux/tc_act/tc_vlan.h>
39
40 #else /* HAVE_TC_ACT_VLAN */
41
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
50
51 struct tc_vlan {
52         tc_gen;
53         int v_action;
54 };
55
56 #endif /* HAVE_TC_ACT_VLAN */
57
58 #ifdef HAVE_TC_ACT_PEDIT
59
60 #include <linux/tc_act/tc_pedit.h>
61
62 #else /* HAVE_TC_ACT_VLAN */
63
64 enum {
65         TCA_PEDIT_UNSPEC,
66         TCA_PEDIT_TM,
67         TCA_PEDIT_PARMS,
68         TCA_PEDIT_PAD,
69         TCA_PEDIT_PARMS_EX,
70         TCA_PEDIT_KEYS_EX,
71         TCA_PEDIT_KEY_EX,
72         __TCA_PEDIT_MAX
73 };
74
75 enum {
76         TCA_PEDIT_KEY_EX_HTYPE = 1,
77         TCA_PEDIT_KEY_EX_CMD = 2,
78         __TCA_PEDIT_KEY_EX_MAX
79 };
80
81 enum pedit_header_type {
82         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
88         __PEDIT_HDR_TYPE_MAX,
89 };
90
91 enum pedit_cmd {
92         TCA_PEDIT_KEY_EX_CMD_SET = 0,
93         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
94         __PEDIT_CMD_MAX,
95 };
96
97 struct tc_pedit_key {
98         __u32 mask; /* AND */
99         __u32 val; /*XOR */
100         __u32 off; /*offset */
101         __u32 at;
102         __u32 offmask;
103         __u32 shift;
104 };
105
106 __extension__
107 struct tc_pedit_sel {
108         tc_gen;
109         unsigned char nkeys;
110         unsigned char flags;
111         struct tc_pedit_key keys[0];
112 };
113
114 #endif /* HAVE_TC_ACT_VLAN */
115
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
117
118 #include <linux/tc_act/tc_tunnel_key.h>
119
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
122 #endif
123
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
126 #endif
127
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
129
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
141
142 struct tc_tunnel_key {
143         tc_gen;
144         int t_action;
145 };
146
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
148
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
152 #endif
153
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
157 #endif
158
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
162 #endif
163 #ifndef HAVE_TCA_CHAIN
164 #define TCA_CHAIN 11
165 #endif
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
168 #endif
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
216 #endif
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
219 #endif
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
222 #endif
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
225 #endif
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
228 #endif
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
231 #endif
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
234 #endif
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
237 #endif
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
240 #endif
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
243 #endif
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
246 #endif
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
249 #endif
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
252 #endif
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
255 #endif
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
258 #endif
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
261 #endif
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
264 #endif
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
267 #endif
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
270 #endif
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
273 #endif
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
276 #endif
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
279 #endif
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
282 #endif
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
285 #endif
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
288 #endif
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
291 #endif
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
294 #endif
295
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
298 #endif
299
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
302 #endif
303
304 #ifndef TP_PORT_LEN
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
306 #endif
307
308 #ifndef TTL_LEN
309 #define TTL_LEN 1
310 #endif
311
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
314 #endif
315
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
320
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323         FLOW_TCF_TUNACT_VXLAN_DECAP,
324         FLOW_TCF_TUNACT_VXLAN_ENCAP,
325 };
326
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
337
338 /**
339  * Structure for holding netlink context.
340  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341  * Using this (8KB) buffer size ensures that netlink messages will never be
342  * truncated.
343  */
344 struct mlx5_flow_tcf_context {
345         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346         uint32_t seq; /* Message sequence number. */
347         uint32_t buf_size; /* Message buffer size. */
348         uint8_t *buf; /* Message buffer. */
349 };
350
351 /**
352  * Neigh rule structure. The neigh rule is applied via Netlink to
353  * outer tunnel iface in order to provide destination MAC address
354  * for the VXLAN encapsultion. The neigh rule is implicitly related
355  * to the Flow itself and can be shared by multiple Flows.
356  */
357 struct tcf_neigh_rule {
358         LIST_ENTRY(tcf_neigh_rule) next;
359         uint32_t refcnt;
360         struct ether_addr eth;
361         uint16_t mask;
362         union {
363                 struct {
364                         rte_be32_t dst;
365                 } ipv4;
366                 struct {
367                         uint8_t dst[IPV6_ADDR_LEN];
368                 } ipv6;
369         };
370 };
371
372 /**
373  * Local rule structure. The local rule is applied via Netlink to
374  * outer tunnel iface in order to provide local and peer IP addresses
375  * of the VXLAN tunnel for encapsulation. The local rule is implicitly
376  * related to the Flow itself and can be shared by multiple Flows.
377  */
378 struct tcf_local_rule {
379         LIST_ENTRY(tcf_local_rule) next;
380         uint32_t refcnt;
381         uint16_t mask;
382         union {
383                 struct {
384                         rte_be32_t dst;
385                         rte_be32_t src;
386                 } ipv4;
387                 struct {
388                         uint8_t dst[IPV6_ADDR_LEN];
389                         uint8_t src[IPV6_ADDR_LEN];
390                 } ipv6;
391         };
392 };
393
394 /** VXLAN virtual netdev. */
395 struct tcf_vtep {
396         LIST_ENTRY(tcf_vtep) next;
397         LIST_HEAD(, tcf_neigh_rule) neigh;
398         LIST_HEAD(, tcf_local_rule) local;
399         uint32_t refcnt;
400         unsigned int ifindex; /**< Own interface index. */
401         unsigned int ifouter; /**< Index of device attached to. */
402         uint16_t port;
403         uint8_t created;
404 };
405
406 /** Tunnel descriptor header, common for all tunnel types. */
407 struct flow_tcf_tunnel_hdr {
408         uint32_t type; /**< Tunnel action type. */
409         struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
410         unsigned int ifindex_org; /**< Original dst/src interface */
411         unsigned int *ifindex_ptr; /**< Interface ptr in message. */
412 };
413
414 struct flow_tcf_vxlan_decap {
415         struct flow_tcf_tunnel_hdr hdr;
416         uint16_t udp_port;
417 };
418
419 struct flow_tcf_vxlan_encap {
420         struct flow_tcf_tunnel_hdr hdr;
421         uint32_t mask;
422         struct {
423                 struct ether_addr dst;
424                 struct ether_addr src;
425         } eth;
426         union {
427                 struct {
428                         rte_be32_t dst;
429                         rte_be32_t src;
430                 } ipv4;
431                 struct {
432                         uint8_t dst[IPV6_ADDR_LEN];
433                         uint8_t src[IPV6_ADDR_LEN];
434                 } ipv6;
435         };
436 struct {
437                 rte_be16_t src;
438                 rte_be16_t dst;
439         } udp;
440         struct {
441                 uint8_t vni[3];
442         } vxlan;
443 };
444
445 /** Structure used when extracting the values of a flow counters
446  * from a netlink message.
447  */
448 struct flow_tcf_stats_basic {
449         bool valid;
450         struct gnet_stats_basic counters;
451 };
452
453 /** Empty masks for known item types. */
454 static const union {
455         struct rte_flow_item_port_id port_id;
456         struct rte_flow_item_eth eth;
457         struct rte_flow_item_vlan vlan;
458         struct rte_flow_item_ipv4 ipv4;
459         struct rte_flow_item_ipv6 ipv6;
460         struct rte_flow_item_tcp tcp;
461         struct rte_flow_item_udp udp;
462         struct rte_flow_item_vxlan vxlan;
463 } flow_tcf_mask_empty;
464
465 /** Supported masks for known item types. */
466 static const struct {
467         struct rte_flow_item_port_id port_id;
468         struct rte_flow_item_eth eth;
469         struct rte_flow_item_vlan vlan;
470         struct rte_flow_item_ipv4 ipv4;
471         struct rte_flow_item_ipv6 ipv6;
472         struct rte_flow_item_tcp tcp;
473         struct rte_flow_item_udp udp;
474         struct rte_flow_item_vxlan vxlan;
475 } flow_tcf_mask_supported = {
476         .port_id = {
477                 .id = 0xffffffff,
478         },
479         .eth = {
480                 .type = RTE_BE16(0xffff),
481                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
482                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
483         },
484         .vlan = {
485                 /* PCP and VID only, no DEI. */
486                 .tci = RTE_BE16(0xefff),
487                 .inner_type = RTE_BE16(0xffff),
488         },
489         .ipv4.hdr = {
490                 .next_proto_id = 0xff,
491                 .src_addr = RTE_BE32(0xffffffff),
492                 .dst_addr = RTE_BE32(0xffffffff),
493         },
494         .ipv6.hdr = {
495                 .proto = 0xff,
496                 .src_addr =
497                         "\xff\xff\xff\xff\xff\xff\xff\xff"
498                         "\xff\xff\xff\xff\xff\xff\xff\xff",
499                 .dst_addr =
500                         "\xff\xff\xff\xff\xff\xff\xff\xff"
501                         "\xff\xff\xff\xff\xff\xff\xff\xff",
502         },
503         .tcp.hdr = {
504                 .src_port = RTE_BE16(0xffff),
505                 .dst_port = RTE_BE16(0xffff),
506                 .tcp_flags = 0xff,
507         },
508         .udp.hdr = {
509                 .src_port = RTE_BE16(0xffff),
510                 .dst_port = RTE_BE16(0xffff),
511         },
512         .vxlan = {
513                .vni = "\xff\xff\xff",
514         },
515 };
516
517 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
518 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
519 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
520 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
521 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
522
523 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
524
525 /** DPDK port to network interface index (ifindex) conversion. */
526 struct flow_tcf_ptoi {
527         uint16_t port_id; /**< DPDK port ID. */
528         unsigned int ifindex; /**< Network interface index. */
529 };
530
531 /* Due to a limitation on driver/FW. */
532 #define MLX5_TCF_GROUP_ID_MAX 3
533 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
534
535 #define MLX5_TCF_FATE_ACTIONS \
536         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
537          MLX5_FLOW_ACTION_JUMP)
538
539 #define MLX5_TCF_VLAN_ACTIONS \
540         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
541          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
542
543 #define MLX5_TCF_VXLAN_ACTIONS \
544         (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
545
546 #define MLX5_TCF_PEDIT_ACTIONS \
547         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
548          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
549          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
550          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
551          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
552
553 #define MLX5_TCF_CONFIG_ACTIONS \
554         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
555          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
556          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
557          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
558
559 #define MAX_PEDIT_KEYS 128
560 #define SZ_PEDIT_KEY_VAL 4
561
562 #define NUM_OF_PEDIT_KEYS(sz) \
563         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
564
565 struct pedit_key_ex {
566         enum pedit_header_type htype;
567         enum pedit_cmd cmd;
568 };
569
570 struct pedit_parser {
571         struct tc_pedit_sel sel;
572         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
573         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
574 };
575
576 /**
577  * Create space for using the implicitly created TC flow counter.
578  *
579  * @param[in] dev
580  *   Pointer to the Ethernet device structure.
581  *
582  * @return
583  *   A pointer to the counter data structure, NULL otherwise and
584  *   rte_errno is set.
585  */
586 static struct mlx5_flow_counter *
587 flow_tcf_counter_new(void)
588 {
589         struct mlx5_flow_counter *cnt;
590
591         /*
592          * eswitch counter cannot be shared and its id is unknown.
593          * currently returning all with id 0.
594          * in the future maybe better to switch to unique numbers.
595          */
596         struct mlx5_flow_counter tmpl = {
597                 .ref_cnt = 1,
598         };
599         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
600         if (!cnt) {
601                 rte_errno = ENOMEM;
602                 return NULL;
603         }
604         *cnt = tmpl;
605         /* Implicit counter, do not add to list. */
606         return cnt;
607 }
608
609 /**
610  * Set pedit key of MAC address
611  *
612  * @param[in] actions
613  *   pointer to action specification
614  * @param[in,out] p_parser
615  *   pointer to pedit_parser
616  */
617 static void
618 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
619                            struct pedit_parser *p_parser)
620 {
621         int idx = p_parser->sel.nkeys;
622         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
623                                         offsetof(struct ether_hdr, s_addr) :
624                                         offsetof(struct ether_hdr, d_addr);
625         const struct rte_flow_action_set_mac *conf =
626                 (const struct rte_flow_action_set_mac *)actions->conf;
627
628         p_parser->keys[idx].off = off;
629         p_parser->keys[idx].mask = ~UINT32_MAX;
630         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
631         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
632         memcpy(&p_parser->keys[idx].val,
633                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
634         idx++;
635         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
636         p_parser->keys[idx].mask = 0xFFFF0000;
637         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
638         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
639         memcpy(&p_parser->keys[idx].val,
640                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
641                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
642         p_parser->sel.nkeys = (++idx);
643 }
644
645 /**
646  * Set pedit key of decrease/set ttl
647  *
648  * @param[in] actions
649  *   pointer to action specification
650  * @param[in,out] p_parser
651  *   pointer to pedit_parser
652  * @param[in] item_flags
653  *   flags of all items presented
654  */
655 static void
656 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
657                                 struct pedit_parser *p_parser,
658                                 uint64_t item_flags)
659 {
660         int idx = p_parser->sel.nkeys;
661
662         p_parser->keys[idx].mask = 0xFFFFFF00;
663         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
664                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
665                 p_parser->keys[idx].off =
666                         offsetof(struct ipv4_hdr, time_to_live);
667         }
668         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
669                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
670                 p_parser->keys[idx].off =
671                         offsetof(struct ipv6_hdr, hop_limits);
672         }
673         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
674                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
675                 p_parser->keys[idx].val = 0x000000FF;
676         } else {
677                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
678                 p_parser->keys[idx].val =
679                         (__u32)((const struct rte_flow_action_set_ttl *)
680                          actions->conf)->ttl_value;
681         }
682         p_parser->sel.nkeys = (++idx);
683 }
684
685 /**
686  * Set pedit key of transport (TCP/UDP) port value
687  *
688  * @param[in] actions
689  *   pointer to action specification
690  * @param[in,out] p_parser
691  *   pointer to pedit_parser
692  * @param[in] item_flags
693  *   flags of all items presented
694  */
695 static void
696 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
697                                 struct pedit_parser *p_parser,
698                                 uint64_t item_flags)
699 {
700         int idx = p_parser->sel.nkeys;
701
702         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
703                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
704         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
705                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
706         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
707         /* offset of src/dst port is same for TCP and UDP */
708         p_parser->keys[idx].off =
709                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
710                 offsetof(struct tcp_hdr, src_port) :
711                 offsetof(struct tcp_hdr, dst_port);
712         p_parser->keys[idx].mask = 0xFFFF0000;
713         p_parser->keys[idx].val =
714                 (__u32)((const struct rte_flow_action_set_tp *)
715                                 actions->conf)->port;
716         p_parser->sel.nkeys = (++idx);
717 }
718
719 /**
720  * Set pedit key of ipv6 address
721  *
722  * @param[in] actions
723  *   pointer to action specification
724  * @param[in,out] p_parser
725  *   pointer to pedit_parser
726  */
727 static void
728 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
729                                  struct pedit_parser *p_parser)
730 {
731         int idx = p_parser->sel.nkeys;
732         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
733         int off_base =
734                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
735                 offsetof(struct ipv6_hdr, src_addr) :
736                 offsetof(struct ipv6_hdr, dst_addr);
737         const struct rte_flow_action_set_ipv6 *conf =
738                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
739
740         for (int i = 0; i < keys; i++, idx++) {
741                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
742                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
743                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
744                 p_parser->keys[idx].mask = ~UINT32_MAX;
745                 memcpy(&p_parser->keys[idx].val,
746                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
747                         SZ_PEDIT_KEY_VAL);
748         }
749         p_parser->sel.nkeys += keys;
750 }
751
752 /**
753  * Set pedit key of ipv4 address
754  *
755  * @param[in] actions
756  *   pointer to action specification
757  * @param[in,out] p_parser
758  *   pointer to pedit_parser
759  */
760 static void
761 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
762                                  struct pedit_parser *p_parser)
763 {
764         int idx = p_parser->sel.nkeys;
765
766         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
767         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
768         p_parser->keys[idx].off =
769                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
770                 offsetof(struct ipv4_hdr, src_addr) :
771                 offsetof(struct ipv4_hdr, dst_addr);
772         p_parser->keys[idx].mask = ~UINT32_MAX;
773         p_parser->keys[idx].val =
774                 ((const struct rte_flow_action_set_ipv4 *)
775                  actions->conf)->ipv4_addr;
776         p_parser->sel.nkeys = (++idx);
777 }
778
779 /**
780  * Create the pedit's na attribute in netlink message
781  * on pre-allocate message buffer
782  *
783  * @param[in,out] nl
784  *   pointer to pre-allocated netlink message buffer
785  * @param[in,out] actions
786  *   pointer to pointer of actions specification.
787  * @param[in,out] action_flags
788  *   pointer to actions flags
789  * @param[in] item_flags
790  *   flags of all item presented
791  */
792 static void
793 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
794                               const struct rte_flow_action **actions,
795                               uint64_t item_flags)
796 {
797         struct pedit_parser p_parser;
798         struct nlattr *na_act_options;
799         struct nlattr *na_pedit_keys;
800
801         memset(&p_parser, 0, sizeof(p_parser));
802         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
803         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
804         /* all modify header actions should be in one tc-pedit action */
805         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
806                 switch ((*actions)->type) {
807                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
808                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
809                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
810                         break;
811                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
812                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
813                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
814                         break;
815                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
816                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
817                         flow_tcf_pedit_key_set_tp_port(*actions,
818                                                         &p_parser, item_flags);
819                         break;
820                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
821                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
822                         flow_tcf_pedit_key_set_dec_ttl(*actions,
823                                                         &p_parser, item_flags);
824                         break;
825                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
826                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
827                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
828                         break;
829                 default:
830                         goto pedit_mnl_msg_done;
831                 }
832         }
833 pedit_mnl_msg_done:
834         p_parser.sel.action = TC_ACT_PIPE;
835         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
836                      sizeof(p_parser.sel) +
837                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
838                      &p_parser);
839         na_pedit_keys =
840                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
841         for (int i = 0; i < p_parser.sel.nkeys; i++) {
842                 struct nlattr *na_pedit_key =
843                         mnl_attr_nest_start(nl,
844                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
845                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
846                                  p_parser.keys_ex[i].htype);
847                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
848                                  p_parser.keys_ex[i].cmd);
849                 mnl_attr_nest_end(nl, na_pedit_key);
850         }
851         mnl_attr_nest_end(nl, na_pedit_keys);
852         mnl_attr_nest_end(nl, na_act_options);
853         (*actions)--;
854 }
855
856 /**
857  * Calculate max memory size of one TC-pedit actions.
858  * One TC-pedit action can contain set of keys each defining
859  * a rewrite element (rte_flow action)
860  *
861  * @param[in,out] actions
862  *   actions specification.
863  * @param[in,out] action_flags
864  *   actions flags
865  * @param[in,out] size
866  *   accumulated size
867  * @return
868  *   Max memory size of one TC-pedit action
869  */
870 static int
871 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
872                                 uint64_t *action_flags)
873 {
874         int pedit_size = 0;
875         int keys = 0;
876         uint64_t flags = 0;
877
878         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
879                       SZ_NLATTR_STRZ_OF("pedit") +
880                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
881         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
882                 switch ((*actions)->type) {
883                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
884                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
885                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
886                         break;
887                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
888                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
889                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
890                         break;
891                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
892                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
893                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
894                         break;
895                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
896                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
897                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
898                         break;
899                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
900                         /* TCP is as same as UDP */
901                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
902                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
903                         break;
904                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
905                         /* TCP is as same as UDP */
906                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
907                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
908                         break;
909                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
910                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
911                         flags |= MLX5_FLOW_ACTION_SET_TTL;
912                         break;
913                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
914                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
915                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
916                         break;
917                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
918                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
919                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
920                         break;
921                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
922                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
923                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
924                         break;
925                 default:
926                         goto get_pedit_action_size_done;
927                 }
928         }
929 get_pedit_action_size_done:
930         /* TCA_PEDIT_PARAMS_EX */
931         pedit_size +=
932                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
933                                   keys * sizeof(struct tc_pedit_key));
934         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
935         pedit_size += keys *
936                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
937                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
938                        SZ_NLATTR_DATA_OF(2));
939         (*action_flags) |= flags;
940         (*actions)--;
941         return pedit_size;
942 }
943
944 /**
945  * Retrieve mask for pattern item.
946  *
947  * This function does basic sanity checks on a pattern item in order to
948  * return the most appropriate mask for it.
949  *
950  * @param[in] item
951  *   Item specification.
952  * @param[in] mask_default
953  *   Default mask for pattern item as specified by the flow API.
954  * @param[in] mask_supported
955  *   Mask fields supported by the implementation.
956  * @param[in] mask_empty
957  *   Empty mask to return when there is no specification.
958  * @param[out] error
959  *   Perform verbose error reporting if not NULL.
960  *
961  * @return
962  *   Either @p item->mask or one of the mask parameters on success, NULL
963  *   otherwise and rte_errno is set.
964  */
965 static const void *
966 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
967                    const void *mask_supported, const void *mask_empty,
968                    size_t mask_size, struct rte_flow_error *error)
969 {
970         const uint8_t *mask;
971         size_t i;
972
973         /* item->last and item->mask cannot exist without item->spec. */
974         if (!item->spec && (item->mask || item->last)) {
975                 rte_flow_error_set(error, EINVAL,
976                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
977                                    "\"mask\" or \"last\" field provided without"
978                                    " a corresponding \"spec\"");
979                 return NULL;
980         }
981         /* No spec, no mask, no problem. */
982         if (!item->spec)
983                 return mask_empty;
984         mask = item->mask ? item->mask : mask_default;
985         assert(mask);
986         /*
987          * Single-pass check to make sure that:
988          * - Mask is supported, no bits are set outside mask_supported.
989          * - Both item->spec and item->last are included in mask.
990          */
991         for (i = 0; i != mask_size; ++i) {
992                 if (!mask[i])
993                         continue;
994                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
995                     ((const uint8_t *)mask_supported)[i]) {
996                         rte_flow_error_set(error, ENOTSUP,
997                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
998                                            "unsupported field found"
999                                            " in \"mask\"");
1000                         return NULL;
1001                 }
1002                 if (item->last &&
1003                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
1004                     (((const uint8_t *)item->last)[i] & mask[i])) {
1005                         rte_flow_error_set(error, EINVAL,
1006                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
1007                                            item->last,
1008                                            "range between \"spec\" and \"last\""
1009                                            " not comprised in \"mask\"");
1010                         return NULL;
1011                 }
1012         }
1013         return mask;
1014 }
1015
1016 /**
1017  * Build a conversion table between port ID and ifindex.
1018  *
1019  * @param[in] dev
1020  *   Pointer to Ethernet device.
1021  * @param[out] ptoi
1022  *   Pointer to ptoi table.
1023  * @param[in] len
1024  *   Size of ptoi table provided.
1025  *
1026  * @return
1027  *   Size of ptoi table filled.
1028  */
1029 static unsigned int
1030 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
1031                           unsigned int len)
1032 {
1033         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1034         uint16_t port_id[n + 1];
1035         unsigned int i;
1036         unsigned int own = 0;
1037
1038         /* At least one port is needed when no switch domain is present. */
1039         if (!n) {
1040                 n = 1;
1041                 port_id[0] = dev->data->port_id;
1042         } else {
1043                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1044         }
1045         if (n > len)
1046                 return 0;
1047         for (i = 0; i != n; ++i) {
1048                 struct rte_eth_dev_info dev_info;
1049
1050                 rte_eth_dev_info_get(port_id[i], &dev_info);
1051                 if (port_id[i] == dev->data->port_id)
1052                         own = i;
1053                 ptoi[i].port_id = port_id[i];
1054                 ptoi[i].ifindex = dev_info.if_index;
1055         }
1056         /* Ensure first entry of ptoi[] is the current device. */
1057         if (own) {
1058                 ptoi[n] = ptoi[0];
1059                 ptoi[0] = ptoi[own];
1060                 ptoi[own] = ptoi[n];
1061         }
1062         /* An entry with zero ifindex terminates ptoi[]. */
1063         ptoi[n].port_id = 0;
1064         ptoi[n].ifindex = 0;
1065         return n;
1066 }
1067
1068 /**
1069  * Verify the @p attr will be correctly understood by the E-switch.
1070  *
1071  * @param[in] attr
1072  *   Pointer to flow attributes
1073  * @param[out] error
1074  *   Pointer to error structure.
1075  *
1076  * @return
1077  *   0 on success, a negative errno value otherwise and rte_errno is set.
1078  */
1079 static int
1080 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
1081                              struct rte_flow_error *error)
1082 {
1083         /*
1084          * Supported attributes: groups, some priorities and ingress only.
1085          * group is supported only if kernel supports chain. Don't care about
1086          * transfer as it is the caller's problem.
1087          */
1088         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
1089                 return rte_flow_error_set(error, ENOTSUP,
1090                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
1091                                           "group ID larger than "
1092                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
1093                                           " isn't supported");
1094         else if (attr->group > 0 &&
1095                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
1096                 return rte_flow_error_set(error, ENOTSUP,
1097                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1098                                           attr,
1099                                           "lowest priority level is "
1100                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1101                                           " when group is configured");
1102         else if (attr->priority > 0xfffe)
1103                 return rte_flow_error_set(error, ENOTSUP,
1104                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1105                                           attr,
1106                                           "lowest priority level is 0xfffe");
1107         if (!attr->ingress)
1108                 return rte_flow_error_set(error, EINVAL,
1109                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1110                                           attr, "only ingress is supported");
1111         if (attr->egress)
1112                 return rte_flow_error_set(error, ENOTSUP,
1113                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1114                                           attr, "egress is not supported");
1115         return 0;
1116 }
1117
1118 /**
1119  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
1120  * The routine checks the L2 fields to be used in encapsulation header.
1121  *
1122  * @param[in] item
1123  *   Pointer to the item structure.
1124  * @param[out] error
1125  *   Pointer to the error structure.
1126  *
1127  * @return
1128  *   0 on success, a negative errno value otherwise and rte_errno is set.
1129  **/
1130 static int
1131 flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
1132                                   struct rte_flow_error *error)
1133 {
1134         const struct rte_flow_item_eth *spec = item->spec;
1135         const struct rte_flow_item_eth *mask = item->mask;
1136
1137         if (!spec) {
1138                 /*
1139                  * Specification for L2 addresses can be empty
1140                  * because these ones are optional and not
1141                  * required directly by tc rule. Kernel tries
1142                  * to resolve these ones on its own
1143                  */
1144                 return 0;
1145         }
1146         if (!mask) {
1147                 /* If mask is not specified use the default one. */
1148                 mask = &rte_flow_item_eth_mask;
1149         }
1150         if (memcmp(&mask->dst,
1151                    &flow_tcf_mask_empty.eth.dst,
1152                    sizeof(flow_tcf_mask_empty.eth.dst))) {
1153                 if (memcmp(&mask->dst,
1154                            &rte_flow_item_eth_mask.dst,
1155                            sizeof(rte_flow_item_eth_mask.dst)))
1156                         return rte_flow_error_set
1157                                 (error, ENOTSUP,
1158                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1159                                  "no support for partial mask on"
1160                                  " \"eth.dst\" field");
1161         }
1162         if (memcmp(&mask->src,
1163                    &flow_tcf_mask_empty.eth.src,
1164                    sizeof(flow_tcf_mask_empty.eth.src))) {
1165                 if (memcmp(&mask->src,
1166                            &rte_flow_item_eth_mask.src,
1167                            sizeof(rte_flow_item_eth_mask.src)))
1168                         return rte_flow_error_set
1169                                 (error, ENOTSUP,
1170                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1171                                  "no support for partial mask on"
1172                                  " \"eth.src\" field");
1173         }
1174         if (mask->type != RTE_BE16(0x0000)) {
1175                 if (mask->type != RTE_BE16(0xffff))
1176                         return rte_flow_error_set
1177                                 (error, ENOTSUP,
1178                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1179                                  "no support for partial mask on"
1180                                  " \"eth.type\" field");
1181                 DRV_LOG(WARNING,
1182                         "outer ethernet type field"
1183                         " cannot be forced for vxlan"
1184                         " encapsulation, parameter ignored");
1185         }
1186         return 0;
1187 }
1188
1189 /**
1190  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
1191  * The routine checks the IPv4 fields to be used in encapsulation header.
1192  *
1193  * @param[in] item
1194  *   Pointer to the item structure.
1195  * @param[out] error
1196  *   Pointer to the error structure.
1197  *
1198  * @return
1199  *   0 on success, a negative errno value otherwise and rte_errno is set.
1200  **/
1201 static int
1202 flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
1203                                    struct rte_flow_error *error)
1204 {
1205         const struct rte_flow_item_ipv4 *spec = item->spec;
1206         const struct rte_flow_item_ipv4 *mask = item->mask;
1207
1208         if (!spec) {
1209                 /*
1210                  * Specification for IP addresses cannot be empty
1211                  * because it is required by tunnel_key parameter.
1212                  */
1213                 return rte_flow_error_set(error, EINVAL,
1214                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1215                                           "NULL outer ipv4 address"
1216                                           " specification for vxlan"
1217                                           " encapsulation");
1218         }
1219         if (!mask)
1220                 mask = &rte_flow_item_ipv4_mask;
1221         if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1222                 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1223                         return rte_flow_error_set
1224                                 (error, ENOTSUP,
1225                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1226                                  "no support for partial mask on"
1227                                  " \"ipv4.hdr.dst_addr\" field"
1228                                  " for vxlan encapsulation");
1229                 /* More IPv4 address validations can be put here. */
1230         } else {
1231                 /*
1232                  * Kernel uses the destination IP address to determine
1233                  * the routing path and obtain the MAC destination
1234                  * address, so IP destination address must be
1235                  * specified in the tc rule.
1236                  */
1237                 return rte_flow_error_set(error, EINVAL,
1238                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1239                                           "outer ipv4 destination address"
1240                                           " must be specified for"
1241                                           " vxlan encapsulation");
1242         }
1243         if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
1244                 if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
1245                         return rte_flow_error_set
1246                                 (error, ENOTSUP,
1247                                  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1248                                  "no support for partial mask on"
1249                                  " \"ipv4.hdr.src_addr\" field"
1250                                  " for vxlan encapsulation");
1251                 /* More IPv4 address validations can be put here. */
1252         } else {
1253                 /*
1254                  * Kernel uses the source IP address to select the
1255                  * interface for egress encapsulated traffic, so
1256                  * it must be specified in the tc rule.
1257                  */
1258                 return rte_flow_error_set(error, EINVAL,
1259                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1260                                           "outer ipv4 source address"
1261                                           " must be specified for"
1262                                           " vxlan encapsulation");
1263         }
1264         return 0;
1265 }
1266
1267 /**
1268  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
1269  * The routine checks the IPv6 fields to be used in encapsulation header.
1270  *
1271  * @param[in] item
1272  *   Pointer to the item structure.
1273  * @param[out] error
1274  *   Pointer to the error structure.
1275  *
1276  * @return
1277  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1278  **/
1279 static int
1280 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
1281                                    struct rte_flow_error *error)
1282 {
1283         const struct rte_flow_item_ipv6 *spec = item->spec;
1284         const struct rte_flow_item_ipv6 *mask = item->mask;
1285
1286         if (!spec) {
1287                 /*
1288                  * Specification for IP addresses cannot be empty
1289                  * because it is required by tunnel_key parameter.
1290                  */
1291                 return rte_flow_error_set(error, EINVAL,
1292                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1293                                           "NULL outer ipv6 address"
1294                                           " specification for"
1295                                           " vxlan encapsulation");
1296         }
1297         if (!mask)
1298                 mask = &rte_flow_item_ipv6_mask;
1299         if (memcmp(&mask->hdr.dst_addr,
1300                    &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1301                    IPV6_ADDR_LEN)) {
1302                 if (memcmp(&mask->hdr.dst_addr,
1303                            &rte_flow_item_ipv6_mask.hdr.dst_addr,
1304                            IPV6_ADDR_LEN))
1305                         return rte_flow_error_set
1306                                         (error, ENOTSUP,
1307                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1308                                          "no support for partial mask on"
1309                                          " \"ipv6.hdr.dst_addr\" field"
1310                                          " for vxlan encapsulation");
1311                 /* More IPv6 address validations can be put here. */
1312         } else {
1313                 /*
1314                  * Kernel uses the destination IP address to determine
1315                  * the routing path and obtain the MAC destination
1316                  * address (heigh or gate), so IP destination address
1317                  * must be specified within the tc rule.
1318                  */
1319                 return rte_flow_error_set(error, EINVAL,
1320                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1321                                           "outer ipv6 destination address"
1322                                           " must be specified for"
1323                                           " vxlan encapsulation");
1324         }
1325         if (memcmp(&mask->hdr.src_addr,
1326                    &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1327                    IPV6_ADDR_LEN)) {
1328                 if (memcmp(&mask->hdr.src_addr,
1329                            &rte_flow_item_ipv6_mask.hdr.src_addr,
1330                            IPV6_ADDR_LEN))
1331                         return rte_flow_error_set
1332                                         (error, ENOTSUP,
1333                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1334                                          "no support for partial mask on"
1335                                          " \"ipv6.hdr.src_addr\" field"
1336                                          " for vxlan encapsulation");
1337                 /* More L3 address validation can be put here. */
1338         } else {
1339                 /*
1340                  * Kernel uses the source IP address to select the
1341                  * interface for egress encapsulated traffic, so
1342                  * it must be specified in the tc rule.
1343                  */
1344                 return rte_flow_error_set(error, EINVAL,
1345                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1346                                           "outer L3 source address"
1347                                           " must be specified for"
1348                                           " vxlan encapsulation");
1349         }
1350         return 0;
1351 }
1352
1353 /**
1354  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
1355  * The routine checks the UDP fields to be used in encapsulation header.
1356  *
1357  * @param[in] item
1358  *   Pointer to the item structure.
1359  * @param[out] error
1360  *   Pointer to the error structure.
1361  *
1362  * @return
1363  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1364  **/
1365 static int
1366 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
1367                                   struct rte_flow_error *error)
1368 {
1369         const struct rte_flow_item_udp *spec = item->spec;
1370         const struct rte_flow_item_udp *mask = item->mask;
1371
1372         if (!spec) {
1373                 /*
1374                  * Specification for UDP ports cannot be empty
1375                  * because it is required by tunnel_key parameter.
1376                  */
1377                 return rte_flow_error_set(error, EINVAL,
1378                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1379                                           "NULL UDP port specification "
1380                                           " for vxlan encapsulation");
1381         }
1382         if (!mask)
1383                 mask = &rte_flow_item_udp_mask;
1384         if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1385                 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1386                         return rte_flow_error_set
1387                                         (error, ENOTSUP,
1388                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1389                                          "no support for partial mask on"
1390                                          " \"udp.hdr.dst_port\" field"
1391                                          " for vxlan encapsulation");
1392                 if (!spec->hdr.dst_port)
1393                         return rte_flow_error_set
1394                                         (error, EINVAL,
1395                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
1396                                          "outer UDP remote port cannot be"
1397                                          " 0 for vxlan encapsulation");
1398         } else {
1399                 return rte_flow_error_set(error, EINVAL,
1400                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1401                                           "outer UDP remote port"
1402                                           " must be specified for"
1403                                           " vxlan encapsulation");
1404         }
1405         if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1406                 if (mask->hdr.src_port != RTE_BE16(0xffff))
1407                         return rte_flow_error_set
1408                                         (error, ENOTSUP,
1409                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1410                                          "no support for partial mask on"
1411                                          " \"udp.hdr.src_port\" field"
1412                                          " for vxlan encapsulation");
1413                 DRV_LOG(WARNING,
1414                         "outer UDP source port cannot be"
1415                         " forced for vxlan encapsulation,"
1416                         " parameter ignored");
1417         }
1418         return 0;
1419 }
1420
1421 /**
1422  * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
1423  * The routine checks the VNIP fields to be used in encapsulation header.
1424  *
1425  * @param[in] item
1426  *   Pointer to the item structure.
1427  * @param[out] error
1428  *   Pointer to the error structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1432  **/
1433 static int
1434 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
1435                                   struct rte_flow_error *error)
1436 {
1437         const struct rte_flow_item_vxlan *spec = item->spec;
1438         const struct rte_flow_item_vxlan *mask = item->mask;
1439
1440         if (!spec) {
1441                 /* Outer VNI is required by tunnel_key parameter. */
1442                 return rte_flow_error_set(error, EINVAL,
1443                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1444                                           "NULL VNI specification"
1445                                           " for vxlan encapsulation");
1446         }
1447         if (!mask)
1448                 mask = &rte_flow_item_vxlan_mask;
1449         if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
1450                 return rte_flow_error_set(error, EINVAL,
1451                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1452                                           "outer VNI must be specified "
1453                                           "for vxlan encapsulation");
1454         if (mask->vni[0] != 0xff ||
1455             mask->vni[1] != 0xff ||
1456             mask->vni[2] != 0xff)
1457                 return rte_flow_error_set(error, ENOTSUP,
1458                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1459                                           "no support for partial mask on"
1460                                           " \"vxlan.vni\" field");
1461
1462         if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
1463                 return rte_flow_error_set(error, EINVAL,
1464                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1465                                           "vxlan vni cannot be 0");
1466         return 0;
1467 }
1468
1469 /**
1470  * Validate VXLAN_ENCAP action item list for E-Switch.
1471  * The routine checks items to be used in encapsulation header.
1472  *
1473  * @param[in] action
1474  *   Pointer to the VXLAN_ENCAP action structure.
1475  * @param[out] error
1476  *   Pointer to the error structure.
1477  *
1478  * @return
1479  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1480  **/
1481 static int
1482 flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
1483                               struct rte_flow_error *error)
1484 {
1485         const struct rte_flow_item *items;
1486         int ret;
1487         uint32_t item_flags = 0;
1488
1489         if (!action->conf)
1490                 return rte_flow_error_set(error, EINVAL,
1491                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1492                                           "Missing vxlan tunnel"
1493                                           " action configuration");
1494         items = ((const struct rte_flow_action_vxlan_encap *)
1495                                         action->conf)->definition;
1496         if (!items)
1497                 return rte_flow_error_set(error, EINVAL,
1498                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1499                                           "Missing vxlan tunnel"
1500                                           " encapsulation parameters");
1501         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1502                 switch (items->type) {
1503                 case RTE_FLOW_ITEM_TYPE_VOID:
1504                         break;
1505                 case RTE_FLOW_ITEM_TYPE_ETH:
1506                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1507                                                           error);
1508                         if (ret < 0)
1509                                 return ret;
1510                         ret = flow_tcf_validate_vxlan_encap_eth(items, error);
1511                         if (ret < 0)
1512                                 return ret;
1513                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1514                         break;
1515                 break;
1516                 case RTE_FLOW_ITEM_TYPE_IPV4:
1517                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1518                                                            error);
1519                         if (ret < 0)
1520                                 return ret;
1521                         ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
1522                         if (ret < 0)
1523                                 return ret;
1524                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1525                         break;
1526                 case RTE_FLOW_ITEM_TYPE_IPV6:
1527                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1528                                                            error);
1529                         if (ret < 0)
1530                                 return ret;
1531                         ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
1532                         if (ret < 0)
1533                                 return ret;
1534                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1535                         break;
1536                 case RTE_FLOW_ITEM_TYPE_UDP:
1537                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1538                                                            0xFF, error);
1539                         if (ret < 0)
1540                                 return ret;
1541                         ret = flow_tcf_validate_vxlan_encap_udp(items, error);
1542                         if (ret < 0)
1543                                 return ret;
1544                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1545                         break;
1546                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1547                         ret = mlx5_flow_validate_item_vxlan(items,
1548                                                             item_flags, error);
1549                         if (ret < 0)
1550                                 return ret;
1551                         ret = flow_tcf_validate_vxlan_encap_vni(items, error);
1552                         if (ret < 0)
1553                                 return ret;
1554                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1555                         break;
1556                 default:
1557                         return rte_flow_error_set
1558                                         (error, ENOTSUP,
1559                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1560                                          "vxlan encap item not supported");
1561                 }
1562         }
1563         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
1564                 return rte_flow_error_set(error, EINVAL,
1565                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1566                                           "no outer IP layer found"
1567                                           " for vxlan encapsulation");
1568         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1569                 return rte_flow_error_set(error, EINVAL,
1570                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1571                                           "no outer UDP layer found"
1572                                           " for vxlan encapsulation");
1573         if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
1574                 return rte_flow_error_set(error, EINVAL,
1575                                           RTE_FLOW_ERROR_TYPE_ACTION, action,
1576                                           "no VXLAN VNI found"
1577                                           " for vxlan encapsulation");
1578         return 0;
1579 }
1580
1581 /**
1582  * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
1583  * is present in actions list.
1584  *
1585  * @param[in] ipv4
1586  *   Outer IPv4 address item (if any, NULL otherwise).
1587  * @param[out] error
1588  *   Pointer to the error structure.
1589  *
1590  * @return
1591  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1592  **/
1593 static int
1594 flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
1595                                    struct rte_flow_error *error)
1596 {
1597         const struct rte_flow_item_ipv4 *spec = ipv4->spec;
1598         const struct rte_flow_item_ipv4 *mask = ipv4->mask;
1599
1600         if (!spec) {
1601                 /*
1602                  * Specification for IP addresses cannot be empty
1603                  * because it is required as decap parameter.
1604                  */
1605                 return rte_flow_error_set(error, EINVAL,
1606                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1607                                           "NULL outer ipv4 address"
1608                                           " specification for vxlan"
1609                                           " for vxlan decapsulation");
1610         }
1611         if (!mask)
1612                 mask = &rte_flow_item_ipv4_mask;
1613         if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
1614                 if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
1615                         return rte_flow_error_set
1616                                         (error, ENOTSUP,
1617                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1618                                          "no support for partial mask on"
1619                                          " \"ipv4.hdr.dst_addr\" field");
1620                 /* More IP address validations can be put here. */
1621         } else {
1622                 /*
1623                  * Kernel uses the destination IP address
1624                  * to determine the ingress network interface
1625                  * for traffic being decapsulated.
1626                  */
1627                 return rte_flow_error_set(error, EINVAL,
1628                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
1629                                           "outer ipv4 destination address"
1630                                           " must be specified for"
1631                                           " vxlan decapsulation");
1632         }
1633         /* Source IP address is optional for decap. */
1634         if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
1635             mask->hdr.src_addr != RTE_BE32(0xffffffff))
1636                 return rte_flow_error_set(error, ENOTSUP,
1637                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1638                                           "no support for partial mask on"
1639                                           " \"ipv4.hdr.src_addr\" field");
1640         return 0;
1641 }
1642
1643 /**
1644  * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
1645  * is present in actions list.
1646  *
1647  * @param[in] ipv6
1648  *   Outer IPv6 address item (if any, NULL otherwise).
1649  * @param[out] error
1650  *   Pointer to the error structure.
1651  *
1652  * @return
1653  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1654  **/
1655 static int
1656 flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
1657                                    struct rte_flow_error *error)
1658 {
1659         const struct rte_flow_item_ipv6 *spec = ipv6->spec;
1660         const struct rte_flow_item_ipv6 *mask = ipv6->mask;
1661
1662         if (!spec) {
1663                 /*
1664                  * Specification for IP addresses cannot be empty
1665                  * because it is required as decap parameter.
1666                  */
1667                 return rte_flow_error_set(error, EINVAL,
1668                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1669                                           "NULL outer ipv6 address"
1670                                           " specification for vxlan"
1671                                           " decapsulation");
1672         }
1673         if (!mask)
1674                 mask = &rte_flow_item_ipv6_mask;
1675         if (memcmp(&mask->hdr.dst_addr,
1676                    &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
1677                    IPV6_ADDR_LEN)) {
1678                 if (memcmp(&mask->hdr.dst_addr,
1679                         &rte_flow_item_ipv6_mask.hdr.dst_addr,
1680                         IPV6_ADDR_LEN))
1681                         return rte_flow_error_set
1682                                         (error, ENOTSUP,
1683                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1684                                          "no support for partial mask on"
1685                                          " \"ipv6.hdr.dst_addr\" field");
1686                 /* More IP address validations can be put here. */
1687         } else {
1688                 /*
1689                  * Kernel uses the destination IP address
1690                  * to determine the ingress network interface
1691                  * for traffic being decapsulated.
1692                  */
1693                 return rte_flow_error_set(error, EINVAL,
1694                                           RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
1695                                           "outer ipv6 destination address must be "
1696                                           "specified for vxlan decapsulation");
1697         }
1698         /* Source IP address is optional for decap. */
1699         if (memcmp(&mask->hdr.src_addr,
1700                    &flow_tcf_mask_empty.ipv6.hdr.src_addr,
1701                    IPV6_ADDR_LEN)) {
1702                 if (memcmp(&mask->hdr.src_addr,
1703                            &rte_flow_item_ipv6_mask.hdr.src_addr,
1704                            IPV6_ADDR_LEN))
1705                         return rte_flow_error_set
1706                                         (error, ENOTSUP,
1707                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1708                                          "no support for partial mask on"
1709                                          " \"ipv6.hdr.src_addr\" field");
1710         }
1711         return 0;
1712 }
1713
1714 /**
1715  * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
1716  * is present in actions list.
1717  *
1718  * @param[in] udp
1719  *   Outer UDP layer item (if any, NULL otherwise).
1720  * @param[out] error
1721  *   Pointer to the error structure.
1722  *
1723  * @return
1724  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1725  **/
1726 static int
1727 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
1728                                   struct rte_flow_error *error)
1729 {
1730         const struct rte_flow_item_udp *spec = udp->spec;
1731         const struct rte_flow_item_udp *mask = udp->mask;
1732
1733         if (!spec)
1734                 /*
1735                  * Specification for UDP ports cannot be empty
1736                  * because it is required as decap parameter.
1737                  */
1738                 return rte_flow_error_set(error, EINVAL,
1739                                           RTE_FLOW_ERROR_TYPE_ITEM, udp,
1740                                           "NULL UDP port specification"
1741                                           " for VXLAN decapsulation");
1742         if (!mask)
1743                 mask = &rte_flow_item_udp_mask;
1744         if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
1745                 if (mask->hdr.dst_port != RTE_BE16(0xffff))
1746                         return rte_flow_error_set
1747                                         (error, ENOTSUP,
1748                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1749                                          "no support for partial mask on"
1750                                          " \"udp.hdr.dst_port\" field");
1751                 if (!spec->hdr.dst_port)
1752                         return rte_flow_error_set
1753                                         (error, EINVAL,
1754                                          RTE_FLOW_ERROR_TYPE_ITEM, udp,
1755                                          "zero decap local UDP port");
1756         } else {
1757                 return rte_flow_error_set(error, EINVAL,
1758                                           RTE_FLOW_ERROR_TYPE_ITEM, udp,
1759                                           "outer UDP destination port must be "
1760                                           "specified for vxlan decapsulation");
1761         }
1762         if (mask->hdr.src_port != RTE_BE16(0x0000)) {
1763                 if (mask->hdr.src_port != RTE_BE16(0xffff))
1764                         return rte_flow_error_set
1765                                         (error, ENOTSUP,
1766                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1767                                          "no support for partial mask on"
1768                                          " \"udp.hdr.src_port\" field");
1769                 DRV_LOG(WARNING,
1770                         "outer UDP local port cannot be "
1771                         "forced for VXLAN encapsulation, "
1772                         "parameter ignored");
1773         }
1774         return 0;
1775 }
1776
1777 /**
1778  * Validate flow for E-Switch.
1779  *
1780  * @param[in] priv
1781  *   Pointer to the priv structure.
1782  * @param[in] attr
1783  *   Pointer to the flow attributes.
1784  * @param[in] items
1785  *   Pointer to the list of items.
1786  * @param[in] actions
1787  *   Pointer to the list of actions.
1788  * @param[out] error
1789  *   Pointer to the error structure.
1790  *
1791  * @return
1792  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1793  */
1794 static int
1795 flow_tcf_validate(struct rte_eth_dev *dev,
1796                   const struct rte_flow_attr *attr,
1797                   const struct rte_flow_item items[],
1798                   const struct rte_flow_action actions[],
1799                   struct rte_flow_error *error)
1800 {
1801         union {
1802                 const struct rte_flow_item_port_id *port_id;
1803                 const struct rte_flow_item_eth *eth;
1804                 const struct rte_flow_item_vlan *vlan;
1805                 const struct rte_flow_item_ipv4 *ipv4;
1806                 const struct rte_flow_item_ipv6 *ipv6;
1807                 const struct rte_flow_item_tcp *tcp;
1808                 const struct rte_flow_item_udp *udp;
1809                 const struct rte_flow_item_vxlan *vxlan;
1810         } spec, mask;
1811         union {
1812                 const struct rte_flow_action_port_id *port_id;
1813                 const struct rte_flow_action_jump *jump;
1814                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1815                 const struct rte_flow_action_of_set_vlan_vid *
1816                         of_set_vlan_vid;
1817                 const struct rte_flow_action_of_set_vlan_pcp *
1818                         of_set_vlan_pcp;
1819                 const struct rte_flow_action_vxlan_encap *vxlan_encap;
1820                 const struct rte_flow_action_set_ipv4 *set_ipv4;
1821                 const struct rte_flow_action_set_ipv6 *set_ipv6;
1822         } conf;
1823         uint64_t item_flags = 0;
1824         uint64_t action_flags = 0;
1825         uint8_t next_protocol = -1;
1826         unsigned int tcm_ifindex = 0;
1827         uint8_t pedit_validated = 0;
1828         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1829         struct rte_eth_dev *port_id_dev = NULL;
1830         bool in_port_id_set;
1831         int ret;
1832
1833         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1834                                                 PTOI_TABLE_SZ_MAX(dev)));
1835         ret = flow_tcf_validate_attributes(attr, error);
1836         if (ret < 0)
1837                 return ret;
1838         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1839                 unsigned int i;
1840                 uint64_t current_action_flag = 0;
1841
1842                 switch (actions->type) {
1843                 case RTE_FLOW_ACTION_TYPE_VOID:
1844                         break;
1845                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1846                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1847                         if (!actions->conf)
1848                                 break;
1849                         conf.port_id = actions->conf;
1850                         if (conf.port_id->original)
1851                                 i = 0;
1852                         else
1853                                 for (i = 0; ptoi[i].ifindex; ++i)
1854                                         if (ptoi[i].port_id == conf.port_id->id)
1855                                                 break;
1856                         if (!ptoi[i].ifindex)
1857                                 return rte_flow_error_set
1858                                         (error, ENODEV,
1859                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1860                                          conf.port_id,
1861                                          "missing data to convert port ID to"
1862                                          " ifindex");
1863                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1864                         break;
1865                 case RTE_FLOW_ACTION_TYPE_JUMP:
1866                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1867                         if (!actions->conf)
1868                                 break;
1869                         conf.jump = actions->conf;
1870                         if (attr->group >= conf.jump->group)
1871                                 return rte_flow_error_set
1872                                         (error, ENOTSUP,
1873                                          RTE_FLOW_ERROR_TYPE_ACTION,
1874                                          actions,
1875                                          "can jump only to a group forward");
1876                         break;
1877                 case RTE_FLOW_ACTION_TYPE_DROP:
1878                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1879                         break;
1880                 case RTE_FLOW_ACTION_TYPE_COUNT:
1881                         break;
1882                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1883                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1884                         break;
1885                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1886                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1887                         break;
1888                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1889                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1890                                 return rte_flow_error_set
1891                                         (error, ENOTSUP,
1892                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1893                                          "vlan modify is not supported,"
1894                                          " set action must follow push action");
1895                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1896                         break;
1897                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1898                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1899                                 return rte_flow_error_set
1900                                         (error, ENOTSUP,
1901                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1902                                          "vlan modify is not supported,"
1903                                          " set action must follow push action");
1904                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1905                         break;
1906                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
1907                         current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
1908                         break;
1909                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
1910                         ret = flow_tcf_validate_vxlan_encap(actions, error);
1911                         if (ret < 0)
1912                                 return ret;
1913                         current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
1914                         break;
1915                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1916                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1917                         break;
1918                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1919                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1920                         break;
1921                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1922                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1923                         break;
1924                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1925                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1926                         break;
1927                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1928                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1929                         break;
1930                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1931                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1932                         break;
1933                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1934                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1935                         break;
1936                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1937                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1938                         break;
1939                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1940                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1941                         break;
1942                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1943                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1944                         break;
1945                 default:
1946                         return rte_flow_error_set(error, ENOTSUP,
1947                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1948                                                   actions,
1949                                                   "action not supported");
1950                 }
1951                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1952                         if (!actions->conf)
1953                                 return rte_flow_error_set
1954                                         (error, EINVAL,
1955                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1956                                          actions,
1957                                          "action configuration not set");
1958                 }
1959                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1960                     pedit_validated)
1961                         return rte_flow_error_set(error, ENOTSUP,
1962                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1963                                                   actions,
1964                                                   "set actions should be "
1965                                                   "listed successively");
1966                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1967                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1968                         pedit_validated = 1;
1969                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1970                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1971                         return rte_flow_error_set(error, EINVAL,
1972                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1973                                                   actions,
1974                                                   "can't have multiple fate"
1975                                                   " actions");
1976                 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1977                     (action_flags & MLX5_TCF_VXLAN_ACTIONS))
1978                         return rte_flow_error_set(error, EINVAL,
1979                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1980                                                   actions,
1981                                                   "can't have multiple vxlan"
1982                                                   " actions");
1983                 if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
1984                     (action_flags & MLX5_TCF_VLAN_ACTIONS))
1985                         return rte_flow_error_set(error, ENOTSUP,
1986                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1987                                                   actions,
1988                                                   "can't have vxlan and vlan"
1989                                                   " actions in the same rule");
1990                 action_flags |= current_action_flag;
1991         }
1992         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1993                 unsigned int i;
1994
1995                 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
1996                     items->type != RTE_FLOW_ITEM_TYPE_ETH)
1997                         return rte_flow_error_set(error, ENOTSUP,
1998                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1999                                                   items,
2000                                                   "only L2 inner item"
2001                                                   " is supported");
2002                 switch (items->type) {
2003                 case RTE_FLOW_ITEM_TYPE_VOID:
2004                         break;
2005                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2006                         mask.port_id = flow_tcf_item_mask
2007                                 (items, &rte_flow_item_port_id_mask,
2008                                  &flow_tcf_mask_supported.port_id,
2009                                  &flow_tcf_mask_empty.port_id,
2010                                  sizeof(flow_tcf_mask_supported.port_id),
2011                                  error);
2012                         if (!mask.port_id)
2013                                 return -rte_errno;
2014                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
2015                                 in_port_id_set = 1;
2016                                 break;
2017                         }
2018                         spec.port_id = items->spec;
2019                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
2020                                 return rte_flow_error_set
2021                                         (error, ENOTSUP,
2022                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2023                                          mask.port_id,
2024                                          "no support for partial mask on"
2025                                          " \"id\" field");
2026                         if (!mask.port_id->id)
2027                                 i = 0;
2028                         else
2029                                 for (i = 0; ptoi[i].ifindex; ++i)
2030                                         if (ptoi[i].port_id == spec.port_id->id)
2031                                                 break;
2032                         if (!ptoi[i].ifindex)
2033                                 return rte_flow_error_set
2034                                         (error, ENODEV,
2035                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2036                                          spec.port_id,
2037                                          "missing data to convert port ID to"
2038                                          " ifindex");
2039                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
2040                                 return rte_flow_error_set
2041                                         (error, ENOTSUP,
2042                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2043                                          spec.port_id,
2044                                          "cannot match traffic for"
2045                                          " several port IDs through"
2046                                          " a single flow rule");
2047                         tcm_ifindex = ptoi[i].ifindex;
2048                         in_port_id_set = 1;
2049                         break;
2050                 case RTE_FLOW_ITEM_TYPE_ETH:
2051                         ret = mlx5_flow_validate_item_eth(items, item_flags,
2052                                                           error);
2053                         if (ret < 0)
2054                                 return ret;
2055                         item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
2056                                         MLX5_FLOW_LAYER_INNER_L2 :
2057                                         MLX5_FLOW_LAYER_OUTER_L2;
2058                         /* TODO:
2059                          * Redundant check due to different supported mask.
2060                          * Same for the rest of items.
2061                          */
2062                         mask.eth = flow_tcf_item_mask
2063                                 (items, &rte_flow_item_eth_mask,
2064                                  &flow_tcf_mask_supported.eth,
2065                                  &flow_tcf_mask_empty.eth,
2066                                  sizeof(flow_tcf_mask_supported.eth),
2067                                  error);
2068                         if (!mask.eth)
2069                                 return -rte_errno;
2070                         if (mask.eth->type && mask.eth->type !=
2071                             RTE_BE16(0xffff))
2072                                 return rte_flow_error_set
2073                                         (error, ENOTSUP,
2074                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2075                                          mask.eth,
2076                                          "no support for partial mask on"
2077                                          " \"type\" field");
2078                         break;
2079                 case RTE_FLOW_ITEM_TYPE_VLAN:
2080                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
2081                                                            error);
2082                         if (ret < 0)
2083                                 return ret;
2084                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2085                         mask.vlan = flow_tcf_item_mask
2086                                 (items, &rte_flow_item_vlan_mask,
2087                                  &flow_tcf_mask_supported.vlan,
2088                                  &flow_tcf_mask_empty.vlan,
2089                                  sizeof(flow_tcf_mask_supported.vlan),
2090                                  error);
2091                         if (!mask.vlan)
2092                                 return -rte_errno;
2093                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
2094                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
2095                               RTE_BE16(0xe000)) ||
2096                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
2097                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
2098                               RTE_BE16(0x0fff)) ||
2099                             (mask.vlan->inner_type &&
2100                              mask.vlan->inner_type != RTE_BE16(0xffff)))
2101                                 return rte_flow_error_set
2102                                         (error, ENOTSUP,
2103                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2104                                          mask.vlan,
2105                                          "no support for partial masks on"
2106                                          " \"tci\" (PCP and VID parts) and"
2107                                          " \"inner_type\" fields");
2108                         break;
2109                 case RTE_FLOW_ITEM_TYPE_IPV4:
2110                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
2111                                                            error);
2112                         if (ret < 0)
2113                                 return ret;
2114                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2115                         mask.ipv4 = flow_tcf_item_mask
2116                                 (items, &rte_flow_item_ipv4_mask,
2117                                  &flow_tcf_mask_supported.ipv4,
2118                                  &flow_tcf_mask_empty.ipv4,
2119                                  sizeof(flow_tcf_mask_supported.ipv4),
2120                                  error);
2121                         if (!mask.ipv4)
2122                                 return -rte_errno;
2123                         if (mask.ipv4->hdr.next_proto_id &&
2124                             mask.ipv4->hdr.next_proto_id != 0xff)
2125                                 return rte_flow_error_set
2126                                         (error, ENOTSUP,
2127                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2128                                          mask.ipv4,
2129                                          "no support for partial mask on"
2130                                          " \"hdr.next_proto_id\" field");
2131                         else if (mask.ipv4->hdr.next_proto_id)
2132                                 next_protocol =
2133                                         ((const struct rte_flow_item_ipv4 *)
2134                                          (items->spec))->hdr.next_proto_id;
2135                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2136                                 ret = flow_tcf_validate_vxlan_decap_ipv4
2137                                                                 (items, error);
2138                                 if (ret < 0)
2139                                         return ret;
2140                         }
2141                         break;
2142                 case RTE_FLOW_ITEM_TYPE_IPV6:
2143                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
2144                                                            error);
2145                         if (ret < 0)
2146                                 return ret;
2147                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2148                         mask.ipv6 = flow_tcf_item_mask
2149                                 (items, &rte_flow_item_ipv6_mask,
2150                                  &flow_tcf_mask_supported.ipv6,
2151                                  &flow_tcf_mask_empty.ipv6,
2152                                  sizeof(flow_tcf_mask_supported.ipv6),
2153                                  error);
2154                         if (!mask.ipv6)
2155                                 return -rte_errno;
2156                         if (mask.ipv6->hdr.proto &&
2157                             mask.ipv6->hdr.proto != 0xff)
2158                                 return rte_flow_error_set
2159                                         (error, ENOTSUP,
2160                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2161                                          mask.ipv6,
2162                                          "no support for partial mask on"
2163                                          " \"hdr.proto\" field");
2164                         else if (mask.ipv6->hdr.proto)
2165                                 next_protocol =
2166                                         ((const struct rte_flow_item_ipv6 *)
2167                                          (items->spec))->hdr.proto;
2168                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2169                                 ret = flow_tcf_validate_vxlan_decap_ipv6
2170                                                                 (items, error);
2171                                 if (ret < 0)
2172                                         return ret;
2173                         }
2174                         break;
2175                 case RTE_FLOW_ITEM_TYPE_UDP:
2176                         ret = mlx5_flow_validate_item_udp(items, item_flags,
2177                                                           next_protocol, error);
2178                         if (ret < 0)
2179                                 return ret;
2180                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2181                         mask.udp = flow_tcf_item_mask
2182                                 (items, &rte_flow_item_udp_mask,
2183                                  &flow_tcf_mask_supported.udp,
2184                                  &flow_tcf_mask_empty.udp,
2185                                  sizeof(flow_tcf_mask_supported.udp),
2186                                  error);
2187                         if (!mask.udp)
2188                                 return -rte_errno;
2189                         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2190                                 ret = flow_tcf_validate_vxlan_decap_udp
2191                                                                 (items, error);
2192                                 if (ret < 0)
2193                                         return ret;
2194                         }
2195                         break;
2196                 case RTE_FLOW_ITEM_TYPE_TCP:
2197                         ret = mlx5_flow_validate_item_tcp
2198                                              (items, item_flags,
2199                                               next_protocol,
2200                                               &flow_tcf_mask_supported.tcp,
2201                                               error);
2202                         if (ret < 0)
2203                                 return ret;
2204                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2205                         mask.tcp = flow_tcf_item_mask
2206                                 (items, &rte_flow_item_tcp_mask,
2207                                  &flow_tcf_mask_supported.tcp,
2208                                  &flow_tcf_mask_empty.tcp,
2209                                  sizeof(flow_tcf_mask_supported.tcp),
2210                                  error);
2211                         if (!mask.tcp)
2212                                 return -rte_errno;
2213                         break;
2214                 case RTE_FLOW_ITEM_TYPE_VXLAN:
2215                         if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
2216                                 return rte_flow_error_set
2217                                         (error, ENOTSUP,
2218                                          RTE_FLOW_ERROR_TYPE_ITEM,
2219                                          items,
2220                                          "vni pattern should be followed by"
2221                                          " vxlan decapsulation action");
2222                         ret = mlx5_flow_validate_item_vxlan(items,
2223                                                             item_flags, error);
2224                         if (ret < 0)
2225                                 return ret;
2226                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
2227                         mask.vxlan = flow_tcf_item_mask
2228                                 (items, &rte_flow_item_vxlan_mask,
2229                                  &flow_tcf_mask_supported.vxlan,
2230                                  &flow_tcf_mask_empty.vxlan,
2231                                  sizeof(flow_tcf_mask_supported.vxlan), error);
2232                         if (!mask.vxlan)
2233                                 return -rte_errno;
2234                         if (mask.vxlan->vni[0] != 0xff ||
2235                             mask.vxlan->vni[1] != 0xff ||
2236                             mask.vxlan->vni[2] != 0xff)
2237                                 return rte_flow_error_set
2238                                         (error, ENOTSUP,
2239                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
2240                                          mask.vxlan,
2241                                          "no support for partial or "
2242                                          "empty mask on \"vxlan.vni\" field");
2243                         break;
2244                 default:
2245                         return rte_flow_error_set(error, ENOTSUP,
2246                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2247                                                   items, "item not supported");
2248                 }
2249         }
2250         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2251             (action_flags & MLX5_FLOW_ACTION_DROP))
2252                 return rte_flow_error_set(error, ENOTSUP,
2253                                           RTE_FLOW_ERROR_TYPE_ACTION,
2254                                           actions,
2255                                           "set action is not compatible with "
2256                                           "drop action");
2257         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
2258             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2259                 return rte_flow_error_set(error, ENOTSUP,
2260                                           RTE_FLOW_ERROR_TYPE_ACTION,
2261                                           actions,
2262                                           "set action must be followed by "
2263                                           "port_id action");
2264         if (action_flags &
2265            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
2266                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
2267                         return rte_flow_error_set(error, EINVAL,
2268                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2269                                                   actions,
2270                                                   "no ipv4 item found in"
2271                                                   " pattern");
2272         }
2273         if (action_flags &
2274            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
2275                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
2276                         return rte_flow_error_set(error, EINVAL,
2277                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2278                                                   actions,
2279                                                   "no ipv6 item found in"
2280                                                   " pattern");
2281         }
2282         if (action_flags &
2283            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
2284                 if (!(item_flags &
2285                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
2286                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
2287                         return rte_flow_error_set(error, EINVAL,
2288                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2289                                                   actions,
2290                                                   "no TCP/UDP item found in"
2291                                                   " pattern");
2292         }
2293         /*
2294          * FW syndrome (0xA9C090):
2295          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
2296          *     forward to the uplink.
2297          */
2298         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
2299             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
2300             ((struct priv *)port_id_dev->data->dev_private)->representor)
2301                 return rte_flow_error_set(error, ENOTSUP,
2302                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2303                                           "vlan push can only be applied"
2304                                           " when forwarding to uplink port");
2305         /*
2306          * FW syndrome (0x294609):
2307          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
2308          *     are supported only while forwarding to vport.
2309          */
2310         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
2311             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2312                 return rte_flow_error_set(error, ENOTSUP,
2313                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2314                                           "vlan actions are supported"
2315                                           " only with port_id action");
2316         if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
2317             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
2318                 return rte_flow_error_set(error, ENOTSUP,
2319                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2320                                           "vxlan actions are supported"
2321                                           " only with port_id action");
2322         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
2323                 return rte_flow_error_set(error, EINVAL,
2324                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
2325                                           "no fate action is found");
2326         if (action_flags &
2327            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
2328                 if (!(item_flags &
2329                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2330                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2331                         return rte_flow_error_set(error, EINVAL,
2332                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2333                                                   actions,
2334                                                   "no IP found in pattern");
2335         }
2336         if (action_flags &
2337             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
2338                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
2339                         return rte_flow_error_set(error, ENOTSUP,
2340                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2341                                                   actions,
2342                                                   "no ethernet found in"
2343                                                   " pattern");
2344         }
2345         if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
2346                 if (!(item_flags &
2347                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
2348                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
2349                         return rte_flow_error_set(error, EINVAL,
2350                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2351                                                   NULL,
2352                                                   "no outer IP pattern found"
2353                                                   " for vxlan decap action");
2354                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2355                         return rte_flow_error_set(error, EINVAL,
2356                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2357                                                   NULL,
2358                                                   "no outer UDP pattern found"
2359                                                   " for vxlan decap action");
2360                 if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
2361                         return rte_flow_error_set(error, EINVAL,
2362                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2363                                                   NULL,
2364                                                   "no VNI pattern found"
2365                                                   " for vxlan decap action");
2366         }
2367         return 0;
2368 }
2369
2370 /**
2371  * Calculate maximum size of memory for flow items of Linux TC flower and
2372  * extract specified items.
2373  *
2374  * @param[in] items
2375  *   Pointer to the list of items.
2376  * @param[out] item_flags
2377  *   Pointer to the detected items.
2378  *
2379  * @return
2380  *   Maximum size of memory for items.
2381  */
2382 static int
2383 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
2384                             const struct rte_flow_item items[],
2385                             uint64_t *item_flags)
2386 {
2387         int size = 0;
2388         uint64_t flags = 0;
2389
2390         size += SZ_NLATTR_STRZ_OF("flower") +
2391                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
2392                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
2393         if (attr->group > 0)
2394                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
2395         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2396                 switch (items->type) {
2397                 case RTE_FLOW_ITEM_TYPE_VOID:
2398                         break;
2399                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2400                         break;
2401                 case RTE_FLOW_ITEM_TYPE_ETH:
2402                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2403                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
2404                                 /* dst/src MAC addr and mask. */
2405                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
2406                         break;
2407                 case RTE_FLOW_ITEM_TYPE_VLAN:
2408                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2409                                 SZ_NLATTR_TYPE_OF(uint16_t) +
2410                                 /* VLAN Ether type. */
2411                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
2412                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
2413                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2414                         break;
2415                 case RTE_FLOW_ITEM_TYPE_IPV4:
2416                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2417                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2418                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
2419                                 /* dst/src IP addr and mask. */
2420                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2421                         break;
2422                 case RTE_FLOW_ITEM_TYPE_IPV6:
2423                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
2424                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2425                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
2426                                 /* dst/src IP addr and mask. */
2427                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2428                         break;
2429                 case RTE_FLOW_ITEM_TYPE_UDP:
2430                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2431                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2432                                 /* dst/src port and mask. */
2433                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2434                         break;
2435                 case RTE_FLOW_ITEM_TYPE_TCP:
2436                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2437                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2438                                 /* dst/src port and mask. */
2439                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2440                         break;
2441                 default:
2442                         DRV_LOG(WARNING,
2443                                 "unsupported item %p type %d,"
2444                                 " items must be validated before flow creation",
2445                                 (const void *)items, items->type);
2446                         break;
2447                 }
2448         }
2449         *item_flags = flags;
2450         return size;
2451 }
2452
2453 /**
2454  * Calculate maximum size of memory for flow actions of Linux TC flower and
2455  * extract specified actions.
2456  *
2457  * @param[in] actions
2458  *   Pointer to the list of actions.
2459  * @param[out] action_flags
2460  *   Pointer to the detected actions.
2461  *
2462  * @return
2463  *   Maximum size of memory for actions.
2464  */
2465 static int
2466 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
2467                               uint64_t *action_flags)
2468 {
2469         int size = 0;
2470         uint64_t flags = 0;
2471
2472         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
2473         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2474                 switch (actions->type) {
2475                 case RTE_FLOW_ACTION_TYPE_VOID:
2476                         break;
2477                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2478                         size += SZ_NLATTR_NEST + /* na_act_index. */
2479                                 SZ_NLATTR_STRZ_OF("mirred") +
2480                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2481                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
2482                         flags |= MLX5_FLOW_ACTION_PORT_ID;
2483                         break;
2484                 case RTE_FLOW_ACTION_TYPE_JUMP:
2485                         size += SZ_NLATTR_NEST + /* na_act_index. */
2486                                 SZ_NLATTR_STRZ_OF("gact") +
2487                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2488                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
2489                         flags |= MLX5_FLOW_ACTION_JUMP;
2490                         break;
2491                 case RTE_FLOW_ACTION_TYPE_DROP:
2492                         size += SZ_NLATTR_NEST + /* na_act_index. */
2493                                 SZ_NLATTR_STRZ_OF("gact") +
2494                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2495                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
2496                         flags |= MLX5_FLOW_ACTION_DROP;
2497                         break;
2498                 case RTE_FLOW_ACTION_TYPE_COUNT:
2499                         break;
2500                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2501                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
2502                         goto action_of_vlan;
2503                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2504                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
2505                         goto action_of_vlan;
2506                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2507                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
2508                         goto action_of_vlan;
2509                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2510                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
2511                         goto action_of_vlan;
2512 action_of_vlan:
2513                         size += SZ_NLATTR_NEST + /* na_act_index. */
2514                                 SZ_NLATTR_STRZ_OF("vlan") +
2515                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
2516                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
2517                                 SZ_NLATTR_TYPE_OF(uint16_t) +
2518                                 /* VLAN protocol. */
2519                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
2520                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
2521                         break;
2522                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2523                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2524                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2525                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2526                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2527                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2528                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2529                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2530                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2531                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2532                         size += flow_tcf_get_pedit_actions_size(&actions,
2533                                                                 &flags);
2534                         break;
2535                 default:
2536                         DRV_LOG(WARNING,
2537                                 "unsupported action %p type %d,"
2538                                 " items must be validated before flow creation",
2539                                 (const void *)actions, actions->type);
2540                         break;
2541                 }
2542         }
2543         *action_flags = flags;
2544         return size;
2545 }
2546
2547 /**
2548  * Brand rtnetlink buffer with unique handle.
2549  *
2550  * This handle should be unique for a given network interface to avoid
2551  * collisions.
2552  *
2553  * @param nlh
2554  *   Pointer to Netlink message.
2555  * @param handle
2556  *   Unique 32-bit handle to use.
2557  */
2558 static void
2559 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
2560 {
2561         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
2562
2563         tcm->tcm_handle = handle;
2564         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
2565                 (void *)nlh, handle);
2566 }
2567
2568 /**
2569  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
2570  * memory required, allocates the memory, initializes Netlink message headers
2571  * and set unique TC message handle.
2572  *
2573  * @param[in] attr
2574  *   Pointer to the flow attributes.
2575  * @param[in] items
2576  *   Pointer to the list of items.
2577  * @param[in] actions
2578  *   Pointer to the list of actions.
2579  * @param[out] item_flags
2580  *   Pointer to bit mask of all items detected.
2581  * @param[out] action_flags
2582  *   Pointer to bit mask of all actions detected.
2583  * @param[out] error
2584  *   Pointer to the error structure.
2585  *
2586  * @return
2587  *   Pointer to mlx5_flow object on success,
2588  *   otherwise NULL and rte_ernno is set.
2589  */
2590 static struct mlx5_flow *
2591 flow_tcf_prepare(const struct rte_flow_attr *attr,
2592                  const struct rte_flow_item items[],
2593                  const struct rte_flow_action actions[],
2594                  uint64_t *item_flags, uint64_t *action_flags,
2595                  struct rte_flow_error *error)
2596 {
2597         size_t size = sizeof(struct mlx5_flow) +
2598                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
2599                       MNL_ALIGN(sizeof(struct tcmsg));
2600         struct mlx5_flow *dev_flow;
2601         struct nlmsghdr *nlh;
2602         struct tcmsg *tcm;
2603
2604         size += flow_tcf_get_items_and_size(attr, items, item_flags);
2605         size += flow_tcf_get_actions_and_size(actions, action_flags);
2606         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
2607         if (!dev_flow) {
2608                 rte_flow_error_set(error, ENOMEM,
2609                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2610                                    "not enough memory to create E-Switch flow");
2611                 return NULL;
2612         }
2613         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
2614         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2615         *dev_flow = (struct mlx5_flow){
2616                 .tcf = (struct mlx5_flow_tcf){
2617                         .nlh = nlh,
2618                         .tcm = tcm,
2619                 },
2620         };
2621         /*
2622          * Generate a reasonably unique handle based on the address of the
2623          * target buffer.
2624          *
2625          * This is straightforward on 32-bit systems where the flow pointer can
2626          * be used directly. Otherwise, its least significant part is taken
2627          * after shifting it by the previous power of two of the pointed buffer
2628          * size.
2629          */
2630         if (sizeof(dev_flow) <= 4)
2631                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
2632         else
2633                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
2634                                        rte_log2_u32(rte_align32prevpow2(size)));
2635         return dev_flow;
2636 }
2637
2638 /**
2639  * Make adjustments for supporting count actions.
2640  *
2641  * @param[in] dev
2642  *   Pointer to the Ethernet device structure.
2643  * @param[in] dev_flow
2644  *   Pointer to mlx5_flow.
2645  * @param[out] error
2646  *   Pointer to error structure.
2647  *
2648  * @return
2649  *   0 On success else a negative errno value is returned and rte_errno is set.
2650  */
2651 static int
2652 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
2653                                   struct mlx5_flow *dev_flow,
2654                                   struct rte_flow_error *error)
2655 {
2656         struct rte_flow *flow = dev_flow->flow;
2657
2658         if (!flow->counter) {
2659                 flow->counter = flow_tcf_counter_new();
2660                 if (!flow->counter)
2661                         return rte_flow_error_set(error, rte_errno,
2662                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2663                                                   NULL,
2664                                                   "cannot get counter"
2665                                                   " context.");
2666         }
2667         return 0;
2668 }
2669
2670 /**
2671  * Translate flow for Linux TC flower and construct Netlink message.
2672  *
2673  * @param[in] priv
2674  *   Pointer to the priv structure.
2675  * @param[in, out] flow
2676  *   Pointer to the sub flow.
2677  * @param[in] attr
2678  *   Pointer to the flow attributes.
2679  * @param[in] items
2680  *   Pointer to the list of items.
2681  * @param[in] actions
2682  *   Pointer to the list of actions.
2683  * @param[out] error
2684  *   Pointer to the error structure.
2685  *
2686  * @return
2687  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2688  */
2689 static int
2690 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2691                    const struct rte_flow_attr *attr,
2692                    const struct rte_flow_item items[],
2693                    const struct rte_flow_action actions[],
2694                    struct rte_flow_error *error)
2695 {
2696         union {
2697                 const struct rte_flow_item_port_id *port_id;
2698                 const struct rte_flow_item_eth *eth;
2699                 const struct rte_flow_item_vlan *vlan;
2700                 const struct rte_flow_item_ipv4 *ipv4;
2701                 const struct rte_flow_item_ipv6 *ipv6;
2702                 const struct rte_flow_item_tcp *tcp;
2703                 const struct rte_flow_item_udp *udp;
2704         } spec, mask;
2705         union {
2706                 const struct rte_flow_action_port_id *port_id;
2707                 const struct rte_flow_action_jump *jump;
2708                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
2709                 const struct rte_flow_action_of_set_vlan_vid *
2710                         of_set_vlan_vid;
2711                 const struct rte_flow_action_of_set_vlan_pcp *
2712                         of_set_vlan_pcp;
2713         } conf;
2714         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
2715         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
2716         struct tcmsg *tcm = dev_flow->tcf.tcm;
2717         uint32_t na_act_index_cur;
2718         bool eth_type_set = 0;
2719         bool vlan_present = 0;
2720         bool vlan_eth_type_set = 0;
2721         bool ip_proto_set = 0;
2722         struct nlattr *na_flower;
2723         struct nlattr *na_flower_act;
2724         struct nlattr *na_vlan_id = NULL;
2725         struct nlattr *na_vlan_priority = NULL;
2726         uint64_t item_flags = 0;
2727         int ret;
2728
2729         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
2730                                                 PTOI_TABLE_SZ_MAX(dev)));
2731         nlh = dev_flow->tcf.nlh;
2732         tcm = dev_flow->tcf.tcm;
2733         /* Prepare API must have been called beforehand. */
2734         assert(nlh != NULL && tcm != NULL);
2735         tcm->tcm_family = AF_UNSPEC;
2736         tcm->tcm_ifindex = ptoi[0].ifindex;
2737         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
2738         /*
2739          * Priority cannot be zero to prevent the kernel from picking one
2740          * automatically.
2741          */
2742         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
2743                                   RTE_BE16(ETH_P_ALL));
2744         if (attr->group > 0)
2745                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
2746         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
2747         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
2748         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
2749         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
2750                 unsigned int i;
2751
2752                 switch (items->type) {
2753                 case RTE_FLOW_ITEM_TYPE_VOID:
2754                         break;
2755                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
2756                         mask.port_id = flow_tcf_item_mask
2757                                 (items, &rte_flow_item_port_id_mask,
2758                                  &flow_tcf_mask_supported.port_id,
2759                                  &flow_tcf_mask_empty.port_id,
2760                                  sizeof(flow_tcf_mask_supported.port_id),
2761                                  error);
2762                         assert(mask.port_id);
2763                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
2764                                 break;
2765                         spec.port_id = items->spec;
2766                         if (!mask.port_id->id)
2767                                 i = 0;
2768                         else
2769                                 for (i = 0; ptoi[i].ifindex; ++i)
2770                                         if (ptoi[i].port_id == spec.port_id->id)
2771                                                 break;
2772                         assert(ptoi[i].ifindex);
2773                         tcm->tcm_ifindex = ptoi[i].ifindex;
2774                         break;
2775                 case RTE_FLOW_ITEM_TYPE_ETH:
2776                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
2777                         mask.eth = flow_tcf_item_mask
2778                                 (items, &rte_flow_item_eth_mask,
2779                                  &flow_tcf_mask_supported.eth,
2780                                  &flow_tcf_mask_empty.eth,
2781                                  sizeof(flow_tcf_mask_supported.eth),
2782                                  error);
2783                         assert(mask.eth);
2784                         if (mask.eth == &flow_tcf_mask_empty.eth)
2785                                 break;
2786                         spec.eth = items->spec;
2787                         if (mask.eth->type) {
2788                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2789                                                  spec.eth->type);
2790                                 eth_type_set = 1;
2791                         }
2792                         if (!is_zero_ether_addr(&mask.eth->dst)) {
2793                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
2794                                              ETHER_ADDR_LEN,
2795                                              spec.eth->dst.addr_bytes);
2796                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
2797                                              ETHER_ADDR_LEN,
2798                                              mask.eth->dst.addr_bytes);
2799                         }
2800                         if (!is_zero_ether_addr(&mask.eth->src)) {
2801                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
2802                                              ETHER_ADDR_LEN,
2803                                              spec.eth->src.addr_bytes);
2804                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
2805                                              ETHER_ADDR_LEN,
2806                                              mask.eth->src.addr_bytes);
2807                         }
2808                         break;
2809                 case RTE_FLOW_ITEM_TYPE_VLAN:
2810                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
2811                         mask.vlan = flow_tcf_item_mask
2812                                 (items, &rte_flow_item_vlan_mask,
2813                                  &flow_tcf_mask_supported.vlan,
2814                                  &flow_tcf_mask_empty.vlan,
2815                                  sizeof(flow_tcf_mask_supported.vlan),
2816                                  error);
2817                         assert(mask.vlan);
2818                         if (!eth_type_set)
2819                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
2820                                                  RTE_BE16(ETH_P_8021Q));
2821                         eth_type_set = 1;
2822                         vlan_present = 1;
2823                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
2824                                 break;
2825                         spec.vlan = items->spec;
2826                         if (mask.vlan->inner_type) {
2827                                 mnl_attr_put_u16(nlh,
2828                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
2829                                                  spec.vlan->inner_type);
2830                                 vlan_eth_type_set = 1;
2831                         }
2832                         if (mask.vlan->tci & RTE_BE16(0xe000))
2833                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
2834                                                 (rte_be_to_cpu_16
2835                                                  (spec.vlan->tci) >> 13) & 0x7);
2836                         if (mask.vlan->tci & RTE_BE16(0x0fff))
2837                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
2838                                                  rte_be_to_cpu_16
2839                                                  (spec.vlan->tci &
2840                                                   RTE_BE16(0x0fff)));
2841                         break;
2842                 case RTE_FLOW_ITEM_TYPE_IPV4:
2843                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2844                         mask.ipv4 = flow_tcf_item_mask
2845                                 (items, &rte_flow_item_ipv4_mask,
2846                                  &flow_tcf_mask_supported.ipv4,
2847                                  &flow_tcf_mask_empty.ipv4,
2848                                  sizeof(flow_tcf_mask_supported.ipv4),
2849                                  error);
2850                         assert(mask.ipv4);
2851                         if (!eth_type_set || !vlan_eth_type_set)
2852                                 mnl_attr_put_u16(nlh,
2853                                                  vlan_present ?
2854                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2855                                                  TCA_FLOWER_KEY_ETH_TYPE,
2856                                                  RTE_BE16(ETH_P_IP));
2857                         eth_type_set = 1;
2858                         vlan_eth_type_set = 1;
2859                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
2860                                 break;
2861                         spec.ipv4 = items->spec;
2862                         if (mask.ipv4->hdr.next_proto_id) {
2863                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2864                                                 spec.ipv4->hdr.next_proto_id);
2865                                 ip_proto_set = 1;
2866                         }
2867                         if (mask.ipv4->hdr.src_addr) {
2868                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
2869                                                  spec.ipv4->hdr.src_addr);
2870                                 mnl_attr_put_u32(nlh,
2871                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
2872                                                  mask.ipv4->hdr.src_addr);
2873                         }
2874                         if (mask.ipv4->hdr.dst_addr) {
2875                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
2876                                                  spec.ipv4->hdr.dst_addr);
2877                                 mnl_attr_put_u32(nlh,
2878                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
2879                                                  mask.ipv4->hdr.dst_addr);
2880                         }
2881                         break;
2882                 case RTE_FLOW_ITEM_TYPE_IPV6:
2883                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2884                         mask.ipv6 = flow_tcf_item_mask
2885                                 (items, &rte_flow_item_ipv6_mask,
2886                                  &flow_tcf_mask_supported.ipv6,
2887                                  &flow_tcf_mask_empty.ipv6,
2888                                  sizeof(flow_tcf_mask_supported.ipv6),
2889                                  error);
2890                         assert(mask.ipv6);
2891                         if (!eth_type_set || !vlan_eth_type_set)
2892                                 mnl_attr_put_u16(nlh,
2893                                                  vlan_present ?
2894                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2895                                                  TCA_FLOWER_KEY_ETH_TYPE,
2896                                                  RTE_BE16(ETH_P_IPV6));
2897                         eth_type_set = 1;
2898                         vlan_eth_type_set = 1;
2899                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
2900                                 break;
2901                         spec.ipv6 = items->spec;
2902                         if (mask.ipv6->hdr.proto) {
2903                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2904                                                 spec.ipv6->hdr.proto);
2905                                 ip_proto_set = 1;
2906                         }
2907                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
2908                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
2909                                              sizeof(spec.ipv6->hdr.src_addr),
2910                                              spec.ipv6->hdr.src_addr);
2911                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
2912                                              sizeof(mask.ipv6->hdr.src_addr),
2913                                              mask.ipv6->hdr.src_addr);
2914                         }
2915                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
2916                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
2917                                              sizeof(spec.ipv6->hdr.dst_addr),
2918                                              spec.ipv6->hdr.dst_addr);
2919                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
2920                                              sizeof(mask.ipv6->hdr.dst_addr),
2921                                              mask.ipv6->hdr.dst_addr);
2922                         }
2923                         break;
2924                 case RTE_FLOW_ITEM_TYPE_UDP:
2925                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2926                         mask.udp = flow_tcf_item_mask
2927                                 (items, &rte_flow_item_udp_mask,
2928                                  &flow_tcf_mask_supported.udp,
2929                                  &flow_tcf_mask_empty.udp,
2930                                  sizeof(flow_tcf_mask_supported.udp),
2931                                  error);
2932                         assert(mask.udp);
2933                         if (!ip_proto_set)
2934                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2935                                                 IPPROTO_UDP);
2936                         if (mask.udp == &flow_tcf_mask_empty.udp)
2937                                 break;
2938                         spec.udp = items->spec;
2939                         if (mask.udp->hdr.src_port) {
2940                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
2941                                                  spec.udp->hdr.src_port);
2942                                 mnl_attr_put_u16(nlh,
2943                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
2944                                                  mask.udp->hdr.src_port);
2945                         }
2946                         if (mask.udp->hdr.dst_port) {
2947                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
2948                                                  spec.udp->hdr.dst_port);
2949                                 mnl_attr_put_u16(nlh,
2950                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
2951                                                  mask.udp->hdr.dst_port);
2952                         }
2953                         break;
2954                 case RTE_FLOW_ITEM_TYPE_TCP:
2955                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2956                         mask.tcp = flow_tcf_item_mask
2957                                 (items, &rte_flow_item_tcp_mask,
2958                                  &flow_tcf_mask_supported.tcp,
2959                                  &flow_tcf_mask_empty.tcp,
2960                                  sizeof(flow_tcf_mask_supported.tcp),
2961                                  error);
2962                         assert(mask.tcp);
2963                         if (!ip_proto_set)
2964                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2965                                                 IPPROTO_TCP);
2966                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
2967                                 break;
2968                         spec.tcp = items->spec;
2969                         if (mask.tcp->hdr.src_port) {
2970                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2971                                                  spec.tcp->hdr.src_port);
2972                                 mnl_attr_put_u16(nlh,
2973                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
2974                                                  mask.tcp->hdr.src_port);
2975                         }
2976                         if (mask.tcp->hdr.dst_port) {
2977                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2978                                                  spec.tcp->hdr.dst_port);
2979                                 mnl_attr_put_u16(nlh,
2980                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
2981                                                  mask.tcp->hdr.dst_port);
2982                         }
2983                         if (mask.tcp->hdr.tcp_flags) {
2984                                 mnl_attr_put_u16
2985                                         (nlh,
2986                                          TCA_FLOWER_KEY_TCP_FLAGS,
2987                                          rte_cpu_to_be_16
2988                                                 (spec.tcp->hdr.tcp_flags));
2989                                 mnl_attr_put_u16
2990                                         (nlh,
2991                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2992                                          rte_cpu_to_be_16
2993                                                 (mask.tcp->hdr.tcp_flags));
2994                         }
2995                         break;
2996                 default:
2997                         return rte_flow_error_set(error, ENOTSUP,
2998                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2999                                                   NULL, "item not supported");
3000                 }
3001         }
3002         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
3003         na_act_index_cur = 1;
3004         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3005                 struct nlattr *na_act_index;
3006                 struct nlattr *na_act;
3007                 unsigned int vlan_act;
3008                 unsigned int i;
3009
3010                 switch (actions->type) {
3011                 case RTE_FLOW_ACTION_TYPE_VOID:
3012                         break;
3013                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
3014                         conf.port_id = actions->conf;
3015                         if (conf.port_id->original)
3016                                 i = 0;
3017                         else
3018                                 for (i = 0; ptoi[i].ifindex; ++i)
3019                                         if (ptoi[i].port_id == conf.port_id->id)
3020                                                 break;
3021                         assert(ptoi[i].ifindex);
3022                         na_act_index =
3023                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3024                         assert(na_act_index);
3025                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
3026                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3027                         assert(na_act);
3028                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
3029                                      sizeof(struct tc_mirred),
3030                                      &(struct tc_mirred){
3031                                         .action = TC_ACT_STOLEN,
3032                                         .eaction = TCA_EGRESS_REDIR,
3033                                         .ifindex = ptoi[i].ifindex,
3034                                      });
3035                         mnl_attr_nest_end(nlh, na_act);
3036                         mnl_attr_nest_end(nlh, na_act_index);
3037                         break;
3038                 case RTE_FLOW_ACTION_TYPE_JUMP:
3039                         conf.jump = actions->conf;
3040                         na_act_index =
3041                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3042                         assert(na_act_index);
3043                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3044                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3045                         assert(na_act);
3046                         mnl_attr_put(nlh, TCA_GACT_PARMS,
3047                                      sizeof(struct tc_gact),
3048                                      &(struct tc_gact){
3049                                         .action = TC_ACT_GOTO_CHAIN |
3050                                                   conf.jump->group,
3051                                      });
3052                         mnl_attr_nest_end(nlh, na_act);
3053                         mnl_attr_nest_end(nlh, na_act_index);
3054                         break;
3055                 case RTE_FLOW_ACTION_TYPE_DROP:
3056                         na_act_index =
3057                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3058                         assert(na_act_index);
3059                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
3060                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3061                         assert(na_act);
3062                         mnl_attr_put(nlh, TCA_GACT_PARMS,
3063                                      sizeof(struct tc_gact),
3064                                      &(struct tc_gact){
3065                                         .action = TC_ACT_SHOT,
3066                                      });
3067                         mnl_attr_nest_end(nlh, na_act);
3068                         mnl_attr_nest_end(nlh, na_act_index);
3069                         break;
3070                 case RTE_FLOW_ACTION_TYPE_COUNT:
3071                         /*
3072                          * Driver adds the count action implicitly for
3073                          * each rule it creates.
3074                          */
3075                         ret = flow_tcf_translate_action_count(dev,
3076                                                               dev_flow, error);
3077                         if (ret < 0)
3078                                 return ret;
3079                         break;
3080                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3081                         conf.of_push_vlan = NULL;
3082                         vlan_act = TCA_VLAN_ACT_POP;
3083                         goto action_of_vlan;
3084                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3085                         conf.of_push_vlan = actions->conf;
3086                         vlan_act = TCA_VLAN_ACT_PUSH;
3087                         goto action_of_vlan;
3088                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3089                         conf.of_set_vlan_vid = actions->conf;
3090                         if (na_vlan_id)
3091                                 goto override_na_vlan_id;
3092                         vlan_act = TCA_VLAN_ACT_MODIFY;
3093                         goto action_of_vlan;
3094                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3095                         conf.of_set_vlan_pcp = actions->conf;
3096                         if (na_vlan_priority)
3097                                 goto override_na_vlan_priority;
3098                         vlan_act = TCA_VLAN_ACT_MODIFY;
3099                         goto action_of_vlan;
3100 action_of_vlan:
3101                         na_act_index =
3102                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3103                         assert(na_act_index);
3104                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
3105                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
3106                         assert(na_act);
3107                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
3108                                      sizeof(struct tc_vlan),
3109                                      &(struct tc_vlan){
3110                                         .action = TC_ACT_PIPE,
3111                                         .v_action = vlan_act,
3112                                      });
3113                         if (vlan_act == TCA_VLAN_ACT_POP) {
3114                                 mnl_attr_nest_end(nlh, na_act);
3115                                 mnl_attr_nest_end(nlh, na_act_index);
3116                                 break;
3117                         }
3118                         if (vlan_act == TCA_VLAN_ACT_PUSH)
3119                                 mnl_attr_put_u16(nlh,
3120                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
3121                                                  conf.of_push_vlan->ethertype);
3122                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
3123                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
3124                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
3125                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
3126                         mnl_attr_nest_end(nlh, na_act);
3127                         mnl_attr_nest_end(nlh, na_act_index);
3128                         if (actions->type ==
3129                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
3130 override_na_vlan_id:
3131                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
3132                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
3133                                         rte_be_to_cpu_16
3134                                         (conf.of_set_vlan_vid->vlan_vid);
3135                         } else if (actions->type ==
3136                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
3137 override_na_vlan_priority:
3138                                 na_vlan_priority->nla_type =
3139                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
3140                                 *(uint8_t *)mnl_attr_get_payload
3141                                         (na_vlan_priority) =
3142                                         conf.of_set_vlan_pcp->vlan_pcp;
3143                         }
3144                         break;
3145                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3146                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3147                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3148                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3149                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3150                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3151                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
3152                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3153                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3154                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3155                         na_act_index =
3156                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
3157                         flow_tcf_create_pedit_mnl_msg(nlh,
3158                                                       &actions, item_flags);
3159                         mnl_attr_nest_end(nlh, na_act_index);
3160                         break;
3161                 default:
3162                         return rte_flow_error_set(error, ENOTSUP,
3163                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3164                                                   actions,
3165                                                   "action not supported");
3166                 }
3167         }
3168         assert(na_flower);
3169         assert(na_flower_act);
3170         mnl_attr_nest_end(nlh, na_flower_act);
3171         mnl_attr_nest_end(nlh, na_flower);
3172         return 0;
3173 }
3174
3175 /**
3176  * Send Netlink message with acknowledgment.
3177  *
3178  * @param ctx
3179  *   Flow context to use.
3180  * @param nlh
3181  *   Message to send. This function always raises the NLM_F_ACK flag before
3182  *   sending.
3183  *
3184  * @return
3185  *   0 on success, a negative errno value otherwise and rte_errno is set.
3186  */
3187 static int
3188 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
3189 {
3190         alignas(struct nlmsghdr)
3191         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
3192                     nlh->nlmsg_len - sizeof(*nlh)];
3193         uint32_t seq = ctx->seq++;
3194         struct mnl_socket *nl = ctx->nl;
3195         int ret;
3196
3197         nlh->nlmsg_flags |= NLM_F_ACK;
3198         nlh->nlmsg_seq = seq;
3199         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
3200         if (ret != -1)
3201                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
3202         if (ret != -1)
3203                 ret = mnl_cb_run
3204                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
3205         if (ret > 0)
3206                 return 0;
3207         rte_errno = errno;
3208         return -rte_errno;
3209 }
3210
3211 /**
3212  * Apply flow to E-Switch by sending Netlink message.
3213  *
3214  * @param[in] dev
3215  *   Pointer to Ethernet device.
3216  * @param[in, out] flow
3217  *   Pointer to the sub flow.
3218  * @param[out] error
3219  *   Pointer to the error structure.
3220  *
3221  * @return
3222  *   0 on success, a negative errno value otherwise and rte_ernno is set.
3223  */
3224 static int
3225 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3226                struct rte_flow_error *error)
3227 {
3228         struct priv *priv = dev->data->dev_private;
3229         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3230         struct mlx5_flow *dev_flow;
3231         struct nlmsghdr *nlh;
3232
3233         dev_flow = LIST_FIRST(&flow->dev_flows);
3234         /* E-Switch flow can't be expanded. */
3235         assert(!LIST_NEXT(dev_flow, next));
3236         nlh = dev_flow->tcf.nlh;
3237         nlh->nlmsg_type = RTM_NEWTFILTER;
3238         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3239         if (!flow_tcf_nl_ack(ctx, nlh))
3240                 return 0;
3241         return rte_flow_error_set(error, rte_errno,
3242                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3243                                   "netlink: failed to create TC flow rule");
3244 }
3245
3246 /**
3247  * Remove flow from E-Switch by sending Netlink message.
3248  *
3249  * @param[in] dev
3250  *   Pointer to Ethernet device.
3251  * @param[in, out] flow
3252  *   Pointer to the sub flow.
3253  */
3254 static void
3255 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3256 {
3257         struct priv *priv = dev->data->dev_private;
3258         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3259         struct mlx5_flow *dev_flow;
3260         struct nlmsghdr *nlh;
3261
3262         if (!flow)
3263                 return;
3264         if (flow->counter) {
3265                 if (--flow->counter->ref_cnt == 0) {
3266                         rte_free(flow->counter);
3267                         flow->counter = NULL;
3268                 }
3269         }
3270         dev_flow = LIST_FIRST(&flow->dev_flows);
3271         if (!dev_flow)
3272                 return;
3273         /* E-Switch flow can't be expanded. */
3274         assert(!LIST_NEXT(dev_flow, next));
3275         nlh = dev_flow->tcf.nlh;
3276         nlh->nlmsg_type = RTM_DELTFILTER;
3277         nlh->nlmsg_flags = NLM_F_REQUEST;
3278         flow_tcf_nl_ack(ctx, nlh);
3279 }
3280
3281 /**
3282  * Remove flow from E-Switch and release resources of the device flow.
3283  *
3284  * @param[in] dev
3285  *   Pointer to Ethernet device.
3286  * @param[in, out] flow
3287  *   Pointer to the sub flow.
3288  */
3289 static void
3290 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3291 {
3292         struct mlx5_flow *dev_flow;
3293
3294         if (!flow)
3295                 return;
3296         flow_tcf_remove(dev, flow);
3297         dev_flow = LIST_FIRST(&flow->dev_flows);
3298         if (!dev_flow)
3299                 return;
3300         /* E-Switch flow can't be expanded. */
3301         assert(!LIST_NEXT(dev_flow, next));
3302         LIST_REMOVE(dev_flow, next);
3303         rte_free(dev_flow);
3304 }
3305
3306 /**
3307  * Helper routine for figuring the space size required for a parse buffer.
3308  *
3309  * @param array
3310  *   array of values to use.
3311  * @param idx
3312  *   Current location in array.
3313  * @param value
3314  *   Value to compare with.
3315  *
3316  * @return
3317  *   The maximum between the given value and the array value on index.
3318  */
3319 static uint16_t
3320 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
3321 {
3322         return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
3323 }
3324
3325 /**
3326  * Parse rtnetlink message attributes filling the attribute table with the info
3327  * retrieved.
3328  *
3329  * @param tb
3330  *   Attribute table to be filled.
3331  * @param[out] max
3332  *   Maxinum entry in the attribute table.
3333  * @param rte
3334  *   The attributes section in the message to be parsed.
3335  * @param len
3336  *   The length of the attributes section in the message.
3337  */
3338 static void
3339 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
3340                          struct rtattr *rta, int len)
3341 {
3342         unsigned short type;
3343         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3344         while (RTA_OK(rta, len)) {
3345                 type = rta->rta_type;
3346                 if (type <= max && !tb[type])
3347                         tb[type] = rta;
3348                 rta = RTA_NEXT(rta, len);
3349         }
3350 }
3351
3352 /**
3353  * Extract flow counters from flower action.
3354  *
3355  * @param rta
3356  *   flower action stats properties in the Netlink message received.
3357  * @param rta_type
3358  *   The backward sequence of rta_types, as written in the attribute table,
3359  *   we need to traverse in order to get to the requested object.
3360  * @param idx
3361  *   Current location in rta_type table.
3362  * @param[out] data
3363  *   data holding the count statistics of the rte_flow retrieved from
3364  *   the message.
3365  *
3366  * @return
3367  *   0 if data was found and retrieved, -1 otherwise.
3368  */
3369 static int
3370 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
3371                                        uint16_t rta_type[], int idx,
3372                                        struct gnet_stats_basic *data)
3373 {
3374         int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
3375                                                  TCA_STATS_BASIC);
3376         struct rtattr *tbs[tca_stats_max + 1];
3377
3378         if (rta == NULL || idx < 0)
3379                 return -1;
3380         flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
3381                                  RTA_DATA(rta), RTA_PAYLOAD(rta));
3382         switch (rta_type[idx]) {
3383         case TCA_STATS_BASIC:
3384                 if (tbs[TCA_STATS_BASIC]) {
3385                         memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
3386                                RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
3387                                sizeof(*data)));
3388                         return 0;
3389                 }
3390                 break;
3391         default:
3392                 break;
3393         }
3394         return -1;
3395 }
3396
3397 /**
3398  * Parse flower single action retrieving the requested action attribute,
3399  * if found.
3400  *
3401  * @param arg
3402  *   flower action properties in the Netlink message received.
3403  * @param rta_type
3404  *   The backward sequence of rta_types, as written in the attribute table,
3405  *   we need to traverse in order to get to the requested object.
3406  * @param idx
3407  *   Current location in rta_type table.
3408  * @param[out] data
3409  *   Count statistics retrieved from the message query.
3410  *
3411  * @return
3412  *   0 if data was found and retrieved, -1 otherwise.
3413  */
3414 static int
3415 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
3416                                      uint16_t rta_type[], int idx, void *data)
3417 {
3418         int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
3419         struct rtattr *tb[tca_act_max + 1];
3420
3421         if (arg == NULL || idx < 0)
3422                 return -1;
3423         flow_tcf_nl_parse_rtattr(tb, tca_act_max,
3424                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
3425         if (tb[TCA_ACT_KIND] == NULL)
3426                 return -1;
3427         switch (rta_type[idx]) {
3428         case TCA_ACT_STATS:
3429                 if (tb[TCA_ACT_STATS])
3430                         return flow_tcf_nl_action_stats_parse_and_get
3431                                         (tb[TCA_ACT_STATS],
3432                                          rta_type, --idx,
3433                                          (struct gnet_stats_basic *)data);
3434                 break;
3435         default:
3436                 break;
3437         }
3438         return -1;
3439 }
3440
3441 /**
3442  * Parse flower action section in the message retrieving the requested
3443  * attribute from the first action that provides it.
3444  *
3445  * @param opt
3446  *   flower section in the Netlink message received.
3447  * @param rta_type
3448  *   The backward sequence of rta_types, as written in the attribute table,
3449  *   we need to traverse in order to get to the requested object.
3450  * @param idx
3451  *   Current location in rta_type table.
3452  * @param[out] data
3453  *   data retrieved from the message query.
3454  *
3455  * @return
3456  *   0 if data was found and retrieved, -1 otherwise.
3457  */
3458 static int
3459 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
3460                                  uint16_t rta_type[], int idx, void *data)
3461 {
3462         struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
3463         int i;
3464
3465         if (arg == NULL || idx < 0)
3466                 return -1;
3467         flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
3468                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
3469         switch (rta_type[idx]) {
3470         /*
3471          * flow counters are stored in the actions defined by the flow
3472          * and not in the flow itself, therefore we need to traverse the
3473          * flower chain of actions in search for them.
3474          *
3475          * Note that the index is not decremented here.
3476          */
3477         case TCA_ACT_STATS:
3478                 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
3479                         if (tb[i] &&
3480                         !flow_tcf_nl_parse_one_action_and_get(tb[i],
3481                                                               rta_type,
3482                                                               idx, data))
3483                                 return 0;
3484                 }
3485                 break;
3486         default:
3487                 break;
3488         }
3489         return -1;
3490 }
3491
3492 /**
3493  * Parse flower classifier options in the message, retrieving the requested
3494  * attribute if found.
3495  *
3496  * @param opt
3497  *   flower section in the Netlink message received.
3498  * @param rta_type
3499  *   The backward sequence of rta_types, as written in the attribute table,
3500  *   we need to traverse in order to get to the requested object.
3501  * @param idx
3502  *   Current location in rta_type table.
3503  * @param[out] data
3504  *   data retrieved from the message query.
3505  *
3506  * @return
3507  *   0 if data was found and retrieved, -1 otherwise.
3508  */
3509 static int
3510 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
3511                                uint16_t rta_type[], int idx, void *data)
3512 {
3513         int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
3514                                                   TCA_FLOWER_ACT);
3515         struct rtattr *tb[tca_flower_max + 1];
3516
3517         if (!opt || idx < 0)
3518                 return -1;
3519         flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
3520                                  RTA_DATA(opt), RTA_PAYLOAD(opt));
3521         switch (rta_type[idx]) {
3522         case TCA_FLOWER_ACT:
3523                 if (tb[TCA_FLOWER_ACT])
3524                         return flow_tcf_nl_action_parse_and_get
3525                                                         (tb[TCA_FLOWER_ACT],
3526                                                          rta_type, --idx, data);
3527                 break;
3528         default:
3529                 break;
3530         }
3531         return -1;
3532 }
3533
3534 /**
3535  * Parse Netlink reply on filter query, retrieving the flow counters.
3536  *
3537  * @param nlh
3538  *   Message received from Netlink.
3539  * @param rta_type
3540  *   The backward sequence of rta_types, as written in the attribute table,
3541  *   we need to traverse in order to get to the requested object.
3542  * @param idx
3543  *   Current location in rta_type table.
3544  * @param[out] data
3545  *   data retrieved from the message query.
3546  *
3547  * @return
3548  *   0 if data was found and retrieved, -1 otherwise.
3549  */
3550 static int
3551 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
3552                                  uint16_t rta_type[], int idx, void *data)
3553 {
3554         struct nlmsghdr *nlh = cnlh;
3555         struct tcmsg *t = NLMSG_DATA(nlh);
3556         int len = nlh->nlmsg_len;
3557         int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
3558         struct rtattr *tb[tca_max + 1];
3559
3560         if (idx < 0)
3561                 return -1;
3562         if (nlh->nlmsg_type != RTM_NEWTFILTER &&
3563             nlh->nlmsg_type != RTM_GETTFILTER &&
3564             nlh->nlmsg_type != RTM_DELTFILTER)
3565                 return -1;
3566         len -= NLMSG_LENGTH(sizeof(*t));
3567         if (len < 0)
3568                 return -1;
3569         flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
3570         /* Not a TC flower flow - bail out */
3571         if (!tb[TCA_KIND] ||
3572             strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
3573                 return -1;
3574         switch (rta_type[idx]) {
3575         case TCA_OPTIONS:
3576                 if (tb[TCA_OPTIONS])
3577                         return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
3578                                                               rta_type,
3579                                                               --idx, data);
3580                 break;
3581         default:
3582                 break;
3583         }
3584         return -1;
3585 }
3586
3587 /**
3588  * A callback to parse Netlink reply on TC flower query.
3589  *
3590  * @param nlh
3591  *   Message received from Netlink.
3592  * @param[out] data
3593  *   Pointer to data area to be filled by the parsing routine.
3594  *   assumed to be a pinter to struct flow_tcf_stats_basic.
3595  *
3596  * @return
3597  *   MNL_CB_OK value.
3598  */
3599 static int
3600 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
3601 {
3602         /*
3603          * The backward sequence of rta_types to pass in order to get
3604          *  to the counters.
3605          */
3606         uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
3607                                 TCA_FLOWER_ACT, TCA_OPTIONS };
3608         struct flow_tcf_stats_basic *sb_data = data;
3609         union {
3610                 const struct nlmsghdr *c;
3611                 struct nlmsghdr *nc;
3612         } tnlh = { .c = nlh };
3613
3614         if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
3615                                               RTE_DIM(rta_type) - 1,
3616                                               (void *)&sb_data->counters))
3617                 sb_data->valid = true;
3618         return MNL_CB_OK;
3619 }
3620
3621 /**
3622  * Query a TC flower rule for its statistics via netlink.
3623  *
3624  * @param[in] dev
3625  *   Pointer to Ethernet device.
3626  * @param[in] flow
3627  *   Pointer to the sub flow.
3628  * @param[out] data
3629  *   data retrieved by the query.
3630  * @param[out] error
3631  *   Perform verbose error reporting if not NULL.
3632  *
3633  * @return
3634  *   0 on success, a negative errno value otherwise and rte_errno is set.
3635  */
3636 static int
3637 flow_tcf_query_count(struct rte_eth_dev *dev,
3638                           struct rte_flow *flow,
3639                           void *data,
3640                           struct rte_flow_error *error)
3641 {
3642         struct flow_tcf_stats_basic sb_data = { 0 };
3643         struct rte_flow_query_count *qc = data;
3644         struct priv *priv = dev->data->dev_private;
3645         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
3646         struct mnl_socket *nl = ctx->nl;
3647         struct mlx5_flow *dev_flow;
3648         struct nlmsghdr *nlh;
3649         uint32_t seq = priv->tcf_context->seq++;
3650         ssize_t ret;
3651         assert(qc);
3652
3653         dev_flow = LIST_FIRST(&flow->dev_flows);
3654         /* E-Switch flow can't be expanded. */
3655         assert(!LIST_NEXT(dev_flow, next));
3656         if (!dev_flow->flow->counter)
3657                 goto notsup_exit;
3658         nlh = dev_flow->tcf.nlh;
3659         nlh->nlmsg_type = RTM_GETTFILTER;
3660         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
3661         nlh->nlmsg_seq = seq;
3662         if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
3663                 goto error_exit;
3664         do {
3665                 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
3666                 if (ret <= 0)
3667                         break;
3668                 ret = mnl_cb_run(ctx->buf, ret, seq,
3669                                  mnl_socket_get_portid(nl),
3670                                  flow_tcf_nl_message_get_stats_basic,
3671                                  (void *)&sb_data);
3672         } while (ret > 0);
3673         /* Return the delta from last reset. */
3674         if (sb_data.valid) {
3675                 /* Return the delta from last reset. */
3676                 qc->hits_set = 1;
3677                 qc->bytes_set = 1;
3678                 qc->hits = sb_data.counters.packets - flow->counter->hits;
3679                 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
3680                 if (qc->reset) {
3681                         flow->counter->hits = sb_data.counters.packets;
3682                         flow->counter->bytes = sb_data.counters.bytes;
3683                 }
3684                 return 0;
3685         }
3686         return rte_flow_error_set(error, EINVAL,
3687                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3688                                   NULL,
3689                                   "flow does not have counter");
3690 error_exit:
3691         return rte_flow_error_set
3692                         (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3693                          NULL, "netlink: failed to read flow rule counters");
3694 notsup_exit:
3695         return rte_flow_error_set
3696                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3697                          NULL, "counters are not available.");
3698 }
3699
3700 /**
3701  * Query a flow.
3702  *
3703  * @see rte_flow_query()
3704  * @see rte_flow_ops
3705  */
3706 static int
3707 flow_tcf_query(struct rte_eth_dev *dev,
3708                struct rte_flow *flow,
3709                const struct rte_flow_action *actions,
3710                void *data,
3711                struct rte_flow_error *error)
3712 {
3713         int ret = -EINVAL;
3714
3715         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3716                 switch (actions->type) {
3717                 case RTE_FLOW_ACTION_TYPE_VOID:
3718                         break;
3719                 case RTE_FLOW_ACTION_TYPE_COUNT:
3720                         ret = flow_tcf_query_count(dev, flow, data, error);
3721                         break;
3722                 default:
3723                         return rte_flow_error_set(error, ENOTSUP,
3724                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3725                                                   actions,
3726                                                   "action not supported");
3727                 }
3728         }
3729         return ret;
3730 }
3731
3732 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
3733         .validate = flow_tcf_validate,
3734         .prepare = flow_tcf_prepare,
3735         .translate = flow_tcf_translate,
3736         .apply = flow_tcf_apply,
3737         .remove = flow_tcf_remove,
3738         .destroy = flow_tcf_destroy,
3739         .query = flow_tcf_query,
3740 };
3741
3742 /**
3743  * Create and configure a libmnl socket for Netlink flow rules.
3744  *
3745  * @return
3746  *   A valid libmnl socket object pointer on success, NULL otherwise and
3747  *   rte_errno is set.
3748  */
3749 static struct mnl_socket *
3750 flow_tcf_mnl_socket_create(void)
3751 {
3752         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
3753
3754         if (nl) {
3755                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
3756                                       sizeof(int));
3757                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
3758                         return nl;
3759         }
3760         rte_errno = errno;
3761         if (nl)
3762                 mnl_socket_close(nl);
3763         return NULL;
3764 }
3765
3766 /**
3767  * Destroy a libmnl socket.
3768  *
3769  * @param nl
3770  *   Libmnl socket of the @p NETLINK_ROUTE kind.
3771  */
3772 static void
3773 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
3774 {
3775         if (nl)
3776                 mnl_socket_close(nl);
3777 }
3778
3779 /**
3780  * Initialize ingress qdisc of a given network interface.
3781  *
3782  * @param ctx
3783  *   Pointer to tc-flower context to use.
3784  * @param ifindex
3785  *   Index of network interface to initialize.
3786  * @param[out] error
3787  *   Perform verbose error reporting if not NULL.
3788  *
3789  * @return
3790  *   0 on success, a negative errno value otherwise and rte_errno is set.
3791  */
3792 int
3793 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
3794                    unsigned int ifindex, struct rte_flow_error *error)
3795 {
3796         struct nlmsghdr *nlh;
3797         struct tcmsg *tcm;
3798         alignas(struct nlmsghdr)
3799         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
3800
3801         /* Destroy existing ingress qdisc and everything attached to it. */
3802         nlh = mnl_nlmsg_put_header(buf);
3803         nlh->nlmsg_type = RTM_DELQDISC;
3804         nlh->nlmsg_flags = NLM_F_REQUEST;
3805         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3806         tcm->tcm_family = AF_UNSPEC;
3807         tcm->tcm_ifindex = ifindex;
3808         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3809         tcm->tcm_parent = TC_H_INGRESS;
3810         /* Ignore errors when qdisc is already absent. */
3811         if (flow_tcf_nl_ack(ctx, nlh) &&
3812             rte_errno != EINVAL && rte_errno != ENOENT)
3813                 return rte_flow_error_set(error, rte_errno,
3814                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3815                                           "netlink: failed to remove ingress"
3816                                           " qdisc");
3817         /* Create fresh ingress qdisc. */
3818         nlh = mnl_nlmsg_put_header(buf);
3819         nlh->nlmsg_type = RTM_NEWQDISC;
3820         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
3821         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
3822         tcm->tcm_family = AF_UNSPEC;
3823         tcm->tcm_ifindex = ifindex;
3824         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
3825         tcm->tcm_parent = TC_H_INGRESS;
3826         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
3827         if (flow_tcf_nl_ack(ctx, nlh))
3828                 return rte_flow_error_set(error, rte_errno,
3829                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
3830                                           "netlink: failed to create ingress"
3831                                           " qdisc");
3832         return 0;
3833 }
3834
3835 /**
3836  * Create libmnl context for Netlink flow rules.
3837  *
3838  * @return
3839  *   A valid libmnl socket object pointer on success, NULL otherwise and
3840  *   rte_errno is set.
3841  */
3842 struct mlx5_flow_tcf_context *
3843 mlx5_flow_tcf_context_create(void)
3844 {
3845         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
3846                                                         sizeof(*ctx),
3847                                                         sizeof(uint32_t));
3848         if (!ctx)
3849                 goto error;
3850         ctx->nl = flow_tcf_mnl_socket_create();
3851         if (!ctx->nl)
3852                 goto error;
3853         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
3854         ctx->buf = rte_zmalloc(__func__,
3855                                ctx->buf_size, sizeof(uint32_t));
3856         if (!ctx->buf)
3857                 goto error;
3858         ctx->seq = random();
3859         return ctx;
3860 error:
3861         mlx5_flow_tcf_context_destroy(ctx);
3862         return NULL;
3863 }
3864
3865 /**
3866  * Destroy a libmnl context.
3867  *
3868  * @param ctx
3869  *   Libmnl socket of the @p NETLINK_ROUTE kind.
3870  */
3871 void
3872 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
3873 {
3874         if (!ctx)
3875                 return;
3876         flow_tcf_mnl_socket_destroy(ctx->nl);
3877         rte_free(ctx->buf);
3878         rte_free(ctx);
3879 }