net/mlx5: add necessary definitions for E-Switch VXLAN
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/socket.h>
24
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
28 #include <rte_flow.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31
32 #include "mlx5.h"
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
35
36 #ifdef HAVE_TC_ACT_VLAN
37
38 #include <linux/tc_act/tc_vlan.h>
39
40 #else /* HAVE_TC_ACT_VLAN */
41
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
50
51 struct tc_vlan {
52         tc_gen;
53         int v_action;
54 };
55
56 #endif /* HAVE_TC_ACT_VLAN */
57
58 #ifdef HAVE_TC_ACT_PEDIT
59
60 #include <linux/tc_act/tc_pedit.h>
61
62 #else /* HAVE_TC_ACT_VLAN */
63
64 enum {
65         TCA_PEDIT_UNSPEC,
66         TCA_PEDIT_TM,
67         TCA_PEDIT_PARMS,
68         TCA_PEDIT_PAD,
69         TCA_PEDIT_PARMS_EX,
70         TCA_PEDIT_KEYS_EX,
71         TCA_PEDIT_KEY_EX,
72         __TCA_PEDIT_MAX
73 };
74
75 enum {
76         TCA_PEDIT_KEY_EX_HTYPE = 1,
77         TCA_PEDIT_KEY_EX_CMD = 2,
78         __TCA_PEDIT_KEY_EX_MAX
79 };
80
81 enum pedit_header_type {
82         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
88         __PEDIT_HDR_TYPE_MAX,
89 };
90
91 enum pedit_cmd {
92         TCA_PEDIT_KEY_EX_CMD_SET = 0,
93         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
94         __PEDIT_CMD_MAX,
95 };
96
97 struct tc_pedit_key {
98         __u32 mask; /* AND */
99         __u32 val; /*XOR */
100         __u32 off; /*offset */
101         __u32 at;
102         __u32 offmask;
103         __u32 shift;
104 };
105
106 __extension__
107 struct tc_pedit_sel {
108         tc_gen;
109         unsigned char nkeys;
110         unsigned char flags;
111         struct tc_pedit_key keys[0];
112 };
113
114 #endif /* HAVE_TC_ACT_VLAN */
115
116 #ifdef HAVE_TC_ACT_TUNNEL_KEY
117
118 #include <linux/tc_act/tc_tunnel_key.h>
119
120 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
121 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
122 #endif
123
124 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
125 #define TCA_TUNNEL_KEY_NO_CSUM 10
126 #endif
127
128 #else /* HAVE_TC_ACT_TUNNEL_KEY */
129
130 #define TCA_ACT_TUNNEL_KEY 17
131 #define TCA_TUNNEL_KEY_ACT_SET 1
132 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
133 #define TCA_TUNNEL_KEY_PARMS 2
134 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
135 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
136 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
137 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
138 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
139 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
140 #define TCA_TUNNEL_KEY_NO_CSUM 10
141
142 struct tc_tunnel_key {
143         tc_gen;
144         int t_action;
145 };
146
147 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
148
149 /* Normally found in linux/netlink.h. */
150 #ifndef NETLINK_CAP_ACK
151 #define NETLINK_CAP_ACK 10
152 #endif
153
154 /* Normally found in linux/pkt_sched.h. */
155 #ifndef TC_H_MIN_INGRESS
156 #define TC_H_MIN_INGRESS 0xfff2u
157 #endif
158
159 /* Normally found in linux/pkt_cls.h. */
160 #ifndef TCA_CLS_FLAGS_SKIP_SW
161 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
162 #endif
163 #ifndef HAVE_TCA_CHAIN
164 #define TCA_CHAIN 11
165 #endif
166 #ifndef HAVE_TCA_FLOWER_ACT
167 #define TCA_FLOWER_ACT 3
168 #endif
169 #ifndef HAVE_TCA_FLOWER_FLAGS
170 #define TCA_FLOWER_FLAGS 22
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
173 #define TCA_FLOWER_KEY_ETH_TYPE 8
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
176 #define TCA_FLOWER_KEY_ETH_DST 4
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
179 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
182 #define TCA_FLOWER_KEY_ETH_SRC 6
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
185 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
188 #define TCA_FLOWER_KEY_IP_PROTO 9
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
191 #define TCA_FLOWER_KEY_IPV4_SRC 10
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
194 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
197 #define TCA_FLOWER_KEY_IPV4_DST 12
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
200 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
203 #define TCA_FLOWER_KEY_IPV6_SRC 14
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
206 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
209 #define TCA_FLOWER_KEY_IPV6_DST 16
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
212 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
215 #define TCA_FLOWER_KEY_TCP_SRC 18
216 #endif
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
218 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
219 #endif
220 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
221 #define TCA_FLOWER_KEY_TCP_DST 19
222 #endif
223 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
224 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
225 #endif
226 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
227 #define TCA_FLOWER_KEY_UDP_SRC 20
228 #endif
229 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
230 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
231 #endif
232 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
233 #define TCA_FLOWER_KEY_UDP_DST 21
234 #endif
235 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
236 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
237 #endif
238 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
239 #define TCA_FLOWER_KEY_VLAN_ID 23
240 #endif
241 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
242 #define TCA_FLOWER_KEY_VLAN_PRIO 24
243 #endif
244 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
245 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
246 #endif
247 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
248 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
249 #endif
250 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
251 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
252 #endif
253 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
254 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
255 #endif
256 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
257 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
258 #endif
259 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
260 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
261 #endif
262 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
263 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
264 #endif
265 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
266 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
267 #endif
268 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
269 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
270 #endif
271 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
272 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
273 #endif
274 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
275 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
276 #endif
277 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
278 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
279 #endif
280 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
281 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
282 #endif
283 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
284 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
285 #endif
286 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
287 #define TCA_FLOWER_KEY_TCP_FLAGS 71
288 #endif
289 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
290 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
291 #endif
292 #ifndef HAVE_TC_ACT_GOTO_CHAIN
293 #define TC_ACT_GOTO_CHAIN 0x20000000
294 #endif
295
296 #ifndef IPV6_ADDR_LEN
297 #define IPV6_ADDR_LEN 16
298 #endif
299
300 #ifndef IPV4_ADDR_LEN
301 #define IPV4_ADDR_LEN 4
302 #endif
303
304 #ifndef TP_PORT_LEN
305 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
306 #endif
307
308 #ifndef TTL_LEN
309 #define TTL_LEN 1
310 #endif
311
312 #ifndef TCA_ACT_MAX_PRIO
313 #define TCA_ACT_MAX_PRIO 32
314 #endif
315
316 /** UDP port range of VXLAN devices created by driver. */
317 #define MLX5_VXLAN_PORT_MIN 30000
318 #define MLX5_VXLAN_PORT_MAX 60000
319 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
320
321 /** Tunnel action type, used for @p type in header structure. */
322 enum flow_tcf_tunact_type {
323         FLOW_TCF_TUNACT_VXLAN_DECAP,
324         FLOW_TCF_TUNACT_VXLAN_ENCAP,
325 };
326
327 /** Flags used for @p mask in tunnel action encap descriptors. */
328 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
329 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
330 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
331 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
332 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
333 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
334 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
335 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
336 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
337
338 /**
339  * Structure for holding netlink context.
340  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
341  * Using this (8KB) buffer size ensures that netlink messages will never be
342  * truncated.
343  */
344 struct mlx5_flow_tcf_context {
345         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
346         uint32_t seq; /* Message sequence number. */
347         uint32_t buf_size; /* Message buffer size. */
348         uint8_t *buf; /* Message buffer. */
349 };
350
351 /** Structure used when extracting the values of a flow counters
352  * from a netlink message.
353  */
354 struct flow_tcf_stats_basic {
355         bool valid;
356         struct gnet_stats_basic counters;
357 };
358
359 /** Empty masks for known item types. */
360 static const union {
361         struct rte_flow_item_port_id port_id;
362         struct rte_flow_item_eth eth;
363         struct rte_flow_item_vlan vlan;
364         struct rte_flow_item_ipv4 ipv4;
365         struct rte_flow_item_ipv6 ipv6;
366         struct rte_flow_item_tcp tcp;
367         struct rte_flow_item_udp udp;
368 } flow_tcf_mask_empty;
369
370 /** Supported masks for known item types. */
371 static const struct {
372         struct rte_flow_item_port_id port_id;
373         struct rte_flow_item_eth eth;
374         struct rte_flow_item_vlan vlan;
375         struct rte_flow_item_ipv4 ipv4;
376         struct rte_flow_item_ipv6 ipv6;
377         struct rte_flow_item_tcp tcp;
378         struct rte_flow_item_udp udp;
379 } flow_tcf_mask_supported = {
380         .port_id = {
381                 .id = 0xffffffff,
382         },
383         .eth = {
384                 .type = RTE_BE16(0xffff),
385                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
386                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
387         },
388         .vlan = {
389                 /* PCP and VID only, no DEI. */
390                 .tci = RTE_BE16(0xefff),
391                 .inner_type = RTE_BE16(0xffff),
392         },
393         .ipv4.hdr = {
394                 .next_proto_id = 0xff,
395                 .src_addr = RTE_BE32(0xffffffff),
396                 .dst_addr = RTE_BE32(0xffffffff),
397         },
398         .ipv6.hdr = {
399                 .proto = 0xff,
400                 .src_addr =
401                         "\xff\xff\xff\xff\xff\xff\xff\xff"
402                         "\xff\xff\xff\xff\xff\xff\xff\xff",
403                 .dst_addr =
404                         "\xff\xff\xff\xff\xff\xff\xff\xff"
405                         "\xff\xff\xff\xff\xff\xff\xff\xff",
406         },
407         .tcp.hdr = {
408                 .src_port = RTE_BE16(0xffff),
409                 .dst_port = RTE_BE16(0xffff),
410                 .tcp_flags = 0xff,
411         },
412         .udp.hdr = {
413                 .src_port = RTE_BE16(0xffff),
414                 .dst_port = RTE_BE16(0xffff),
415         },
416 };
417
418 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
419 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
420 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
421 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
422 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
423
424 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
425
426 /** DPDK port to network interface index (ifindex) conversion. */
427 struct flow_tcf_ptoi {
428         uint16_t port_id; /**< DPDK port ID. */
429         unsigned int ifindex; /**< Network interface index. */
430 };
431
432 /* Due to a limitation on driver/FW. */
433 #define MLX5_TCF_GROUP_ID_MAX 3
434 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
435
436 #define MLX5_TCF_FATE_ACTIONS \
437         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
438          MLX5_FLOW_ACTION_JUMP)
439
440 #define MLX5_TCF_VLAN_ACTIONS \
441         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
442          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
443
444 #define MLX5_TCF_VXLAN_ACTIONS \
445         (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
446
447 #define MLX5_TCF_PEDIT_ACTIONS \
448         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
449          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
450          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
451          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
452          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
453
454 #define MLX5_TCF_CONFIG_ACTIONS \
455         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
456          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
457          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
458          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
459
460 #define MAX_PEDIT_KEYS 128
461 #define SZ_PEDIT_KEY_VAL 4
462
463 #define NUM_OF_PEDIT_KEYS(sz) \
464         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
465
466 struct pedit_key_ex {
467         enum pedit_header_type htype;
468         enum pedit_cmd cmd;
469 };
470
471 struct pedit_parser {
472         struct tc_pedit_sel sel;
473         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
474         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
475 };
476
477 /**
478  * Create space for using the implicitly created TC flow counter.
479  *
480  * @param[in] dev
481  *   Pointer to the Ethernet device structure.
482  *
483  * @return
484  *   A pointer to the counter data structure, NULL otherwise and
485  *   rte_errno is set.
486  */
487 static struct mlx5_flow_counter *
488 flow_tcf_counter_new(void)
489 {
490         struct mlx5_flow_counter *cnt;
491
492         /*
493          * eswitch counter cannot be shared and its id is unknown.
494          * currently returning all with id 0.
495          * in the future maybe better to switch to unique numbers.
496          */
497         struct mlx5_flow_counter tmpl = {
498                 .ref_cnt = 1,
499         };
500         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
501         if (!cnt) {
502                 rte_errno = ENOMEM;
503                 return NULL;
504         }
505         *cnt = tmpl;
506         /* Implicit counter, do not add to list. */
507         return cnt;
508 }
509
510 /**
511  * Set pedit key of MAC address
512  *
513  * @param[in] actions
514  *   pointer to action specification
515  * @param[in,out] p_parser
516  *   pointer to pedit_parser
517  */
518 static void
519 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
520                            struct pedit_parser *p_parser)
521 {
522         int idx = p_parser->sel.nkeys;
523         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
524                                         offsetof(struct ether_hdr, s_addr) :
525                                         offsetof(struct ether_hdr, d_addr);
526         const struct rte_flow_action_set_mac *conf =
527                 (const struct rte_flow_action_set_mac *)actions->conf;
528
529         p_parser->keys[idx].off = off;
530         p_parser->keys[idx].mask = ~UINT32_MAX;
531         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
532         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
533         memcpy(&p_parser->keys[idx].val,
534                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
535         idx++;
536         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
537         p_parser->keys[idx].mask = 0xFFFF0000;
538         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
539         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
540         memcpy(&p_parser->keys[idx].val,
541                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
542                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
543         p_parser->sel.nkeys = (++idx);
544 }
545
546 /**
547  * Set pedit key of decrease/set ttl
548  *
549  * @param[in] actions
550  *   pointer to action specification
551  * @param[in,out] p_parser
552  *   pointer to pedit_parser
553  * @param[in] item_flags
554  *   flags of all items presented
555  */
556 static void
557 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
558                                 struct pedit_parser *p_parser,
559                                 uint64_t item_flags)
560 {
561         int idx = p_parser->sel.nkeys;
562
563         p_parser->keys[idx].mask = 0xFFFFFF00;
564         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
565                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
566                 p_parser->keys[idx].off =
567                         offsetof(struct ipv4_hdr, time_to_live);
568         }
569         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
570                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
571                 p_parser->keys[idx].off =
572                         offsetof(struct ipv6_hdr, hop_limits);
573         }
574         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
575                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
576                 p_parser->keys[idx].val = 0x000000FF;
577         } else {
578                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
579                 p_parser->keys[idx].val =
580                         (__u32)((const struct rte_flow_action_set_ttl *)
581                          actions->conf)->ttl_value;
582         }
583         p_parser->sel.nkeys = (++idx);
584 }
585
586 /**
587  * Set pedit key of transport (TCP/UDP) port value
588  *
589  * @param[in] actions
590  *   pointer to action specification
591  * @param[in,out] p_parser
592  *   pointer to pedit_parser
593  * @param[in] item_flags
594  *   flags of all items presented
595  */
596 static void
597 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
598                                 struct pedit_parser *p_parser,
599                                 uint64_t item_flags)
600 {
601         int idx = p_parser->sel.nkeys;
602
603         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
604                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
605         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
606                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
607         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
608         /* offset of src/dst port is same for TCP and UDP */
609         p_parser->keys[idx].off =
610                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
611                 offsetof(struct tcp_hdr, src_port) :
612                 offsetof(struct tcp_hdr, dst_port);
613         p_parser->keys[idx].mask = 0xFFFF0000;
614         p_parser->keys[idx].val =
615                 (__u32)((const struct rte_flow_action_set_tp *)
616                                 actions->conf)->port;
617         p_parser->sel.nkeys = (++idx);
618 }
619
620 /**
621  * Set pedit key of ipv6 address
622  *
623  * @param[in] actions
624  *   pointer to action specification
625  * @param[in,out] p_parser
626  *   pointer to pedit_parser
627  */
628 static void
629 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
630                                  struct pedit_parser *p_parser)
631 {
632         int idx = p_parser->sel.nkeys;
633         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
634         int off_base =
635                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
636                 offsetof(struct ipv6_hdr, src_addr) :
637                 offsetof(struct ipv6_hdr, dst_addr);
638         const struct rte_flow_action_set_ipv6 *conf =
639                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
640
641         for (int i = 0; i < keys; i++, idx++) {
642                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
643                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
644                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
645                 p_parser->keys[idx].mask = ~UINT32_MAX;
646                 memcpy(&p_parser->keys[idx].val,
647                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
648                         SZ_PEDIT_KEY_VAL);
649         }
650         p_parser->sel.nkeys += keys;
651 }
652
653 /**
654  * Set pedit key of ipv4 address
655  *
656  * @param[in] actions
657  *   pointer to action specification
658  * @param[in,out] p_parser
659  *   pointer to pedit_parser
660  */
661 static void
662 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
663                                  struct pedit_parser *p_parser)
664 {
665         int idx = p_parser->sel.nkeys;
666
667         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
668         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
669         p_parser->keys[idx].off =
670                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
671                 offsetof(struct ipv4_hdr, src_addr) :
672                 offsetof(struct ipv4_hdr, dst_addr);
673         p_parser->keys[idx].mask = ~UINT32_MAX;
674         p_parser->keys[idx].val =
675                 ((const struct rte_flow_action_set_ipv4 *)
676                  actions->conf)->ipv4_addr;
677         p_parser->sel.nkeys = (++idx);
678 }
679
680 /**
681  * Create the pedit's na attribute in netlink message
682  * on pre-allocate message buffer
683  *
684  * @param[in,out] nl
685  *   pointer to pre-allocated netlink message buffer
686  * @param[in,out] actions
687  *   pointer to pointer of actions specification.
688  * @param[in,out] action_flags
689  *   pointer to actions flags
690  * @param[in] item_flags
691  *   flags of all item presented
692  */
693 static void
694 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
695                               const struct rte_flow_action **actions,
696                               uint64_t item_flags)
697 {
698         struct pedit_parser p_parser;
699         struct nlattr *na_act_options;
700         struct nlattr *na_pedit_keys;
701
702         memset(&p_parser, 0, sizeof(p_parser));
703         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
704         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
705         /* all modify header actions should be in one tc-pedit action */
706         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
707                 switch ((*actions)->type) {
708                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
709                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
710                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
711                         break;
712                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
713                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
714                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
715                         break;
716                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
717                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
718                         flow_tcf_pedit_key_set_tp_port(*actions,
719                                                         &p_parser, item_flags);
720                         break;
721                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
722                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
723                         flow_tcf_pedit_key_set_dec_ttl(*actions,
724                                                         &p_parser, item_flags);
725                         break;
726                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
727                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
728                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
729                         break;
730                 default:
731                         goto pedit_mnl_msg_done;
732                 }
733         }
734 pedit_mnl_msg_done:
735         p_parser.sel.action = TC_ACT_PIPE;
736         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
737                      sizeof(p_parser.sel) +
738                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
739                      &p_parser);
740         na_pedit_keys =
741                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
742         for (int i = 0; i < p_parser.sel.nkeys; i++) {
743                 struct nlattr *na_pedit_key =
744                         mnl_attr_nest_start(nl,
745                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
746                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
747                                  p_parser.keys_ex[i].htype);
748                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
749                                  p_parser.keys_ex[i].cmd);
750                 mnl_attr_nest_end(nl, na_pedit_key);
751         }
752         mnl_attr_nest_end(nl, na_pedit_keys);
753         mnl_attr_nest_end(nl, na_act_options);
754         (*actions)--;
755 }
756
757 /**
758  * Calculate max memory size of one TC-pedit actions.
759  * One TC-pedit action can contain set of keys each defining
760  * a rewrite element (rte_flow action)
761  *
762  * @param[in,out] actions
763  *   actions specification.
764  * @param[in,out] action_flags
765  *   actions flags
766  * @param[in,out] size
767  *   accumulated size
768  * @return
769  *   Max memory size of one TC-pedit action
770  */
771 static int
772 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
773                                 uint64_t *action_flags)
774 {
775         int pedit_size = 0;
776         int keys = 0;
777         uint64_t flags = 0;
778
779         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
780                       SZ_NLATTR_STRZ_OF("pedit") +
781                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
782         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
783                 switch ((*actions)->type) {
784                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
785                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
786                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
787                         break;
788                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
789                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
790                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
791                         break;
792                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
793                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
794                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
795                         break;
796                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
797                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
798                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
799                         break;
800                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
801                         /* TCP is as same as UDP */
802                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
803                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
804                         break;
805                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
806                         /* TCP is as same as UDP */
807                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
808                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
809                         break;
810                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
811                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
812                         flags |= MLX5_FLOW_ACTION_SET_TTL;
813                         break;
814                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
815                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
816                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
817                         break;
818                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
819                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
820                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
821                         break;
822                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
823                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
824                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
825                         break;
826                 default:
827                         goto get_pedit_action_size_done;
828                 }
829         }
830 get_pedit_action_size_done:
831         /* TCA_PEDIT_PARAMS_EX */
832         pedit_size +=
833                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
834                                   keys * sizeof(struct tc_pedit_key));
835         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
836         pedit_size += keys *
837                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
838                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
839                        SZ_NLATTR_DATA_OF(2));
840         (*action_flags) |= flags;
841         (*actions)--;
842         return pedit_size;
843 }
844
845 /**
846  * Retrieve mask for pattern item.
847  *
848  * This function does basic sanity checks on a pattern item in order to
849  * return the most appropriate mask for it.
850  *
851  * @param[in] item
852  *   Item specification.
853  * @param[in] mask_default
854  *   Default mask for pattern item as specified by the flow API.
855  * @param[in] mask_supported
856  *   Mask fields supported by the implementation.
857  * @param[in] mask_empty
858  *   Empty mask to return when there is no specification.
859  * @param[out] error
860  *   Perform verbose error reporting if not NULL.
861  *
862  * @return
863  *   Either @p item->mask or one of the mask parameters on success, NULL
864  *   otherwise and rte_errno is set.
865  */
866 static const void *
867 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
868                    const void *mask_supported, const void *mask_empty,
869                    size_t mask_size, struct rte_flow_error *error)
870 {
871         const uint8_t *mask;
872         size_t i;
873
874         /* item->last and item->mask cannot exist without item->spec. */
875         if (!item->spec && (item->mask || item->last)) {
876                 rte_flow_error_set(error, EINVAL,
877                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
878                                    "\"mask\" or \"last\" field provided without"
879                                    " a corresponding \"spec\"");
880                 return NULL;
881         }
882         /* No spec, no mask, no problem. */
883         if (!item->spec)
884                 return mask_empty;
885         mask = item->mask ? item->mask : mask_default;
886         assert(mask);
887         /*
888          * Single-pass check to make sure that:
889          * - Mask is supported, no bits are set outside mask_supported.
890          * - Both item->spec and item->last are included in mask.
891          */
892         for (i = 0; i != mask_size; ++i) {
893                 if (!mask[i])
894                         continue;
895                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
896                     ((const uint8_t *)mask_supported)[i]) {
897                         rte_flow_error_set(error, ENOTSUP,
898                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
899                                            "unsupported field found"
900                                            " in \"mask\"");
901                         return NULL;
902                 }
903                 if (item->last &&
904                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
905                     (((const uint8_t *)item->last)[i] & mask[i])) {
906                         rte_flow_error_set(error, EINVAL,
907                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
908                                            item->last,
909                                            "range between \"spec\" and \"last\""
910                                            " not comprised in \"mask\"");
911                         return NULL;
912                 }
913         }
914         return mask;
915 }
916
917 /**
918  * Build a conversion table between port ID and ifindex.
919  *
920  * @param[in] dev
921  *   Pointer to Ethernet device.
922  * @param[out] ptoi
923  *   Pointer to ptoi table.
924  * @param[in] len
925  *   Size of ptoi table provided.
926  *
927  * @return
928  *   Size of ptoi table filled.
929  */
930 static unsigned int
931 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
932                           unsigned int len)
933 {
934         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
935         uint16_t port_id[n + 1];
936         unsigned int i;
937         unsigned int own = 0;
938
939         /* At least one port is needed when no switch domain is present. */
940         if (!n) {
941                 n = 1;
942                 port_id[0] = dev->data->port_id;
943         } else {
944                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
945         }
946         if (n > len)
947                 return 0;
948         for (i = 0; i != n; ++i) {
949                 struct rte_eth_dev_info dev_info;
950
951                 rte_eth_dev_info_get(port_id[i], &dev_info);
952                 if (port_id[i] == dev->data->port_id)
953                         own = i;
954                 ptoi[i].port_id = port_id[i];
955                 ptoi[i].ifindex = dev_info.if_index;
956         }
957         /* Ensure first entry of ptoi[] is the current device. */
958         if (own) {
959                 ptoi[n] = ptoi[0];
960                 ptoi[0] = ptoi[own];
961                 ptoi[own] = ptoi[n];
962         }
963         /* An entry with zero ifindex terminates ptoi[]. */
964         ptoi[n].port_id = 0;
965         ptoi[n].ifindex = 0;
966         return n;
967 }
968
969 /**
970  * Verify the @p attr will be correctly understood by the E-switch.
971  *
972  * @param[in] attr
973  *   Pointer to flow attributes
974  * @param[out] error
975  *   Pointer to error structure.
976  *
977  * @return
978  *   0 on success, a negative errno value otherwise and rte_errno is set.
979  */
980 static int
981 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
982                              struct rte_flow_error *error)
983 {
984         /*
985          * Supported attributes: groups, some priorities and ingress only.
986          * group is supported only if kernel supports chain. Don't care about
987          * transfer as it is the caller's problem.
988          */
989         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
990                 return rte_flow_error_set(error, ENOTSUP,
991                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
992                                           "group ID larger than "
993                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
994                                           " isn't supported");
995         else if (attr->group > 0 &&
996                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
997                 return rte_flow_error_set(error, ENOTSUP,
998                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
999                                           attr,
1000                                           "lowest priority level is "
1001                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
1002                                           " when group is configured");
1003         else if (attr->priority > 0xfffe)
1004                 return rte_flow_error_set(error, ENOTSUP,
1005                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1006                                           attr,
1007                                           "lowest priority level is 0xfffe");
1008         if (!attr->ingress)
1009                 return rte_flow_error_set(error, EINVAL,
1010                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1011                                           attr, "only ingress is supported");
1012         if (attr->egress)
1013                 return rte_flow_error_set(error, ENOTSUP,
1014                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1015                                           attr, "egress is not supported");
1016         return 0;
1017 }
1018
1019 /**
1020  * Validate flow for E-Switch.
1021  *
1022  * @param[in] priv
1023  *   Pointer to the priv structure.
1024  * @param[in] attr
1025  *   Pointer to the flow attributes.
1026  * @param[in] items
1027  *   Pointer to the list of items.
1028  * @param[in] actions
1029  *   Pointer to the list of actions.
1030  * @param[out] error
1031  *   Pointer to the error structure.
1032  *
1033  * @return
1034  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1035  */
1036 static int
1037 flow_tcf_validate(struct rte_eth_dev *dev,
1038                   const struct rte_flow_attr *attr,
1039                   const struct rte_flow_item items[],
1040                   const struct rte_flow_action actions[],
1041                   struct rte_flow_error *error)
1042 {
1043         union {
1044                 const struct rte_flow_item_port_id *port_id;
1045                 const struct rte_flow_item_eth *eth;
1046                 const struct rte_flow_item_vlan *vlan;
1047                 const struct rte_flow_item_ipv4 *ipv4;
1048                 const struct rte_flow_item_ipv6 *ipv6;
1049                 const struct rte_flow_item_tcp *tcp;
1050                 const struct rte_flow_item_udp *udp;
1051         } spec, mask;
1052         union {
1053                 const struct rte_flow_action_port_id *port_id;
1054                 const struct rte_flow_action_jump *jump;
1055                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1056                 const struct rte_flow_action_of_set_vlan_vid *
1057                         of_set_vlan_vid;
1058                 const struct rte_flow_action_of_set_vlan_pcp *
1059                         of_set_vlan_pcp;
1060                 const struct rte_flow_action_set_ipv4 *set_ipv4;
1061                 const struct rte_flow_action_set_ipv6 *set_ipv6;
1062         } conf;
1063         uint64_t item_flags = 0;
1064         uint64_t action_flags = 0;
1065         uint8_t next_protocol = -1;
1066         unsigned int tcm_ifindex = 0;
1067         uint8_t pedit_validated = 0;
1068         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1069         struct rte_eth_dev *port_id_dev = NULL;
1070         bool in_port_id_set;
1071         int ret;
1072
1073         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1074                                                 PTOI_TABLE_SZ_MAX(dev)));
1075         ret = flow_tcf_validate_attributes(attr, error);
1076         if (ret < 0)
1077                 return ret;
1078         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1079                 unsigned int i;
1080
1081                 switch (items->type) {
1082                 case RTE_FLOW_ITEM_TYPE_VOID:
1083                         break;
1084                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1085                         mask.port_id = flow_tcf_item_mask
1086                                 (items, &rte_flow_item_port_id_mask,
1087                                  &flow_tcf_mask_supported.port_id,
1088                                  &flow_tcf_mask_empty.port_id,
1089                                  sizeof(flow_tcf_mask_supported.port_id),
1090                                  error);
1091                         if (!mask.port_id)
1092                                 return -rte_errno;
1093                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1094                                 in_port_id_set = 1;
1095                                 break;
1096                         }
1097                         spec.port_id = items->spec;
1098                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1099                                 return rte_flow_error_set
1100                                         (error, ENOTSUP,
1101                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1102                                          mask.port_id,
1103                                          "no support for partial mask on"
1104                                          " \"id\" field");
1105                         if (!mask.port_id->id)
1106                                 i = 0;
1107                         else
1108                                 for (i = 0; ptoi[i].ifindex; ++i)
1109                                         if (ptoi[i].port_id == spec.port_id->id)
1110                                                 break;
1111                         if (!ptoi[i].ifindex)
1112                                 return rte_flow_error_set
1113                                         (error, ENODEV,
1114                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1115                                          spec.port_id,
1116                                          "missing data to convert port ID to"
1117                                          " ifindex");
1118                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1119                                 return rte_flow_error_set
1120                                         (error, ENOTSUP,
1121                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1122                                          spec.port_id,
1123                                          "cannot match traffic for"
1124                                          " several port IDs through"
1125                                          " a single flow rule");
1126                         tcm_ifindex = ptoi[i].ifindex;
1127                         in_port_id_set = 1;
1128                         break;
1129                 case RTE_FLOW_ITEM_TYPE_ETH:
1130                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1131                                                           error);
1132                         if (ret < 0)
1133                                 return ret;
1134                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1135                         /* TODO:
1136                          * Redundant check due to different supported mask.
1137                          * Same for the rest of items.
1138                          */
1139                         mask.eth = flow_tcf_item_mask
1140                                 (items, &rte_flow_item_eth_mask,
1141                                  &flow_tcf_mask_supported.eth,
1142                                  &flow_tcf_mask_empty.eth,
1143                                  sizeof(flow_tcf_mask_supported.eth),
1144                                  error);
1145                         if (!mask.eth)
1146                                 return -rte_errno;
1147                         if (mask.eth->type && mask.eth->type !=
1148                             RTE_BE16(0xffff))
1149                                 return rte_flow_error_set
1150                                         (error, ENOTSUP,
1151                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1152                                          mask.eth,
1153                                          "no support for partial mask on"
1154                                          " \"type\" field");
1155                         break;
1156                 case RTE_FLOW_ITEM_TYPE_VLAN:
1157                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1158                                                            error);
1159                         if (ret < 0)
1160                                 return ret;
1161                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1162                         mask.vlan = flow_tcf_item_mask
1163                                 (items, &rte_flow_item_vlan_mask,
1164                                  &flow_tcf_mask_supported.vlan,
1165                                  &flow_tcf_mask_empty.vlan,
1166                                  sizeof(flow_tcf_mask_supported.vlan),
1167                                  error);
1168                         if (!mask.vlan)
1169                                 return -rte_errno;
1170                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1171                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
1172                               RTE_BE16(0xe000)) ||
1173                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
1174                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1175                               RTE_BE16(0x0fff)) ||
1176                             (mask.vlan->inner_type &&
1177                              mask.vlan->inner_type != RTE_BE16(0xffff)))
1178                                 return rte_flow_error_set
1179                                         (error, ENOTSUP,
1180                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1181                                          mask.vlan,
1182                                          "no support for partial masks on"
1183                                          " \"tci\" (PCP and VID parts) and"
1184                                          " \"inner_type\" fields");
1185                         break;
1186                 case RTE_FLOW_ITEM_TYPE_IPV4:
1187                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1188                                                            error);
1189                         if (ret < 0)
1190                                 return ret;
1191                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1192                         mask.ipv4 = flow_tcf_item_mask
1193                                 (items, &rte_flow_item_ipv4_mask,
1194                                  &flow_tcf_mask_supported.ipv4,
1195                                  &flow_tcf_mask_empty.ipv4,
1196                                  sizeof(flow_tcf_mask_supported.ipv4),
1197                                  error);
1198                         if (!mask.ipv4)
1199                                 return -rte_errno;
1200                         if (mask.ipv4->hdr.next_proto_id &&
1201                             mask.ipv4->hdr.next_proto_id != 0xff)
1202                                 return rte_flow_error_set
1203                                         (error, ENOTSUP,
1204                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1205                                          mask.ipv4,
1206                                          "no support for partial mask on"
1207                                          " \"hdr.next_proto_id\" field");
1208                         else if (mask.ipv4->hdr.next_proto_id)
1209                                 next_protocol =
1210                                         ((const struct rte_flow_item_ipv4 *)
1211                                          (items->spec))->hdr.next_proto_id;
1212                         break;
1213                 case RTE_FLOW_ITEM_TYPE_IPV6:
1214                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1215                                                            error);
1216                         if (ret < 0)
1217                                 return ret;
1218                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1219                         mask.ipv6 = flow_tcf_item_mask
1220                                 (items, &rte_flow_item_ipv6_mask,
1221                                  &flow_tcf_mask_supported.ipv6,
1222                                  &flow_tcf_mask_empty.ipv6,
1223                                  sizeof(flow_tcf_mask_supported.ipv6),
1224                                  error);
1225                         if (!mask.ipv6)
1226                                 return -rte_errno;
1227                         if (mask.ipv6->hdr.proto &&
1228                             mask.ipv6->hdr.proto != 0xff)
1229                                 return rte_flow_error_set
1230                                         (error, ENOTSUP,
1231                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1232                                          mask.ipv6,
1233                                          "no support for partial mask on"
1234                                          " \"hdr.proto\" field");
1235                         else if (mask.ipv6->hdr.proto)
1236                                 next_protocol =
1237                                         ((const struct rte_flow_item_ipv6 *)
1238                                          (items->spec))->hdr.proto;
1239                         break;
1240                 case RTE_FLOW_ITEM_TYPE_UDP:
1241                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1242                                                           next_protocol, error);
1243                         if (ret < 0)
1244                                 return ret;
1245                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1246                         mask.udp = flow_tcf_item_mask
1247                                 (items, &rte_flow_item_udp_mask,
1248                                  &flow_tcf_mask_supported.udp,
1249                                  &flow_tcf_mask_empty.udp,
1250                                  sizeof(flow_tcf_mask_supported.udp),
1251                                  error);
1252                         if (!mask.udp)
1253                                 return -rte_errno;
1254                         break;
1255                 case RTE_FLOW_ITEM_TYPE_TCP:
1256                         ret = mlx5_flow_validate_item_tcp
1257                                              (items, item_flags,
1258                                               next_protocol,
1259                                               &flow_tcf_mask_supported.tcp,
1260                                               error);
1261                         if (ret < 0)
1262                                 return ret;
1263                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1264                         mask.tcp = flow_tcf_item_mask
1265                                 (items, &rte_flow_item_tcp_mask,
1266                                  &flow_tcf_mask_supported.tcp,
1267                                  &flow_tcf_mask_empty.tcp,
1268                                  sizeof(flow_tcf_mask_supported.tcp),
1269                                  error);
1270                         if (!mask.tcp)
1271                                 return -rte_errno;
1272                         break;
1273                 default:
1274                         return rte_flow_error_set(error, ENOTSUP,
1275                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1276                                                   NULL, "item not supported");
1277                 }
1278         }
1279         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1280                 unsigned int i;
1281                 uint64_t current_action_flag = 0;
1282
1283                 switch (actions->type) {
1284                 case RTE_FLOW_ACTION_TYPE_VOID:
1285                         break;
1286                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1287                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1288                         if (!actions->conf)
1289                                 break;
1290                         conf.port_id = actions->conf;
1291                         if (conf.port_id->original)
1292                                 i = 0;
1293                         else
1294                                 for (i = 0; ptoi[i].ifindex; ++i)
1295                                         if (ptoi[i].port_id == conf.port_id->id)
1296                                                 break;
1297                         if (!ptoi[i].ifindex)
1298                                 return rte_flow_error_set
1299                                         (error, ENODEV,
1300                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1301                                          conf.port_id,
1302                                          "missing data to convert port ID to"
1303                                          " ifindex");
1304                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1305                         break;
1306                 case RTE_FLOW_ACTION_TYPE_JUMP:
1307                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1308                         if (!actions->conf)
1309                                 break;
1310                         conf.jump = actions->conf;
1311                         if (attr->group >= conf.jump->group)
1312                                 return rte_flow_error_set
1313                                         (error, ENOTSUP,
1314                                          RTE_FLOW_ERROR_TYPE_ACTION,
1315                                          actions,
1316                                          "can jump only to a group forward");
1317                         break;
1318                 case RTE_FLOW_ACTION_TYPE_DROP:
1319                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1320                         break;
1321                 case RTE_FLOW_ACTION_TYPE_COUNT:
1322                         break;
1323                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1324                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1325                         break;
1326                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1327                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1328                         break;
1329                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1330                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1331                                 return rte_flow_error_set
1332                                         (error, ENOTSUP,
1333                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1334                                          "vlan modify is not supported,"
1335                                          " set action must follow push action");
1336                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1337                         break;
1338                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1339                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1340                                 return rte_flow_error_set
1341                                         (error, ENOTSUP,
1342                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1343                                          "vlan modify is not supported,"
1344                                          " set action must follow push action");
1345                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1346                         break;
1347                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1348                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1349                         break;
1350                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1351                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1352                         break;
1353                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1354                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1355                         break;
1356                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1357                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1358                         break;
1359                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1360                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1361                         break;
1362                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1363                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1364                         break;
1365                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1366                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1367                         break;
1368                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1369                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1370                         break;
1371                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1372                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1373                         break;
1374                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1375                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1376                         break;
1377                 default:
1378                         return rte_flow_error_set(error, ENOTSUP,
1379                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1380                                                   actions,
1381                                                   "action not supported");
1382                 }
1383                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1384                         if (!actions->conf)
1385                                 return rte_flow_error_set(error, EINVAL,
1386                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1387                                                 actions,
1388                                                 "action configuration not set");
1389                 }
1390                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1391                     pedit_validated)
1392                         return rte_flow_error_set(error, ENOTSUP,
1393                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1394                                                   actions,
1395                                                   "set actions should be "
1396                                                   "listed successively");
1397                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1398                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1399                         pedit_validated = 1;
1400                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1401                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1402                         return rte_flow_error_set(error, EINVAL,
1403                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1404                                                   actions,
1405                                                   "can't have multiple fate"
1406                                                   " actions");
1407                 action_flags |= current_action_flag;
1408         }
1409         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1410             (action_flags & MLX5_FLOW_ACTION_DROP))
1411                 return rte_flow_error_set(error, ENOTSUP,
1412                                           RTE_FLOW_ERROR_TYPE_ACTION,
1413                                           actions,
1414                                           "set action is not compatible with "
1415                                           "drop action");
1416         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1417             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1418                 return rte_flow_error_set(error, ENOTSUP,
1419                                           RTE_FLOW_ERROR_TYPE_ACTION,
1420                                           actions,
1421                                           "set action must be followed by "
1422                                           "port_id action");
1423         if (action_flags &
1424            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1425                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1426                         return rte_flow_error_set(error, EINVAL,
1427                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1428                                                   actions,
1429                                                   "no ipv4 item found in"
1430                                                   " pattern");
1431         }
1432         if (action_flags &
1433            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1434                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1435                         return rte_flow_error_set(error, EINVAL,
1436                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1437                                                   actions,
1438                                                   "no ipv6 item found in"
1439                                                   " pattern");
1440         }
1441         if (action_flags &
1442            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1443                 if (!(item_flags &
1444                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1445                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1446                         return rte_flow_error_set(error, EINVAL,
1447                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1448                                                   actions,
1449                                                   "no TCP/UDP item found in"
1450                                                   " pattern");
1451         }
1452         /*
1453          * FW syndrome (0xA9C090):
1454          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1455          *     forward to the uplink.
1456          */
1457         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1458             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1459             ((struct priv *)port_id_dev->data->dev_private)->representor)
1460                 return rte_flow_error_set(error, ENOTSUP,
1461                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1462                                           "vlan push can only be applied"
1463                                           " when forwarding to uplink port");
1464         /*
1465          * FW syndrome (0x294609):
1466          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1467          *     are supported only while forwarding to vport.
1468          */
1469         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1470             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1471                 return rte_flow_error_set(error, ENOTSUP,
1472                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1473                                           "vlan actions are supported"
1474                                           " only with port_id action");
1475         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1476                 return rte_flow_error_set(error, EINVAL,
1477                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1478                                           "no fate action is found");
1479         if (action_flags &
1480            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1481                 if (!(item_flags &
1482                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1483                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1484                         return rte_flow_error_set(error, EINVAL,
1485                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1486                                                   actions,
1487                                                   "no IP found in pattern");
1488         }
1489         if (action_flags &
1490             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1491                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1492                         return rte_flow_error_set(error, ENOTSUP,
1493                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1494                                                   actions,
1495                                                   "no ethernet found in"
1496                                                   " pattern");
1497         }
1498         return 0;
1499 }
1500
1501 /**
1502  * Calculate maximum size of memory for flow items of Linux TC flower and
1503  * extract specified items.
1504  *
1505  * @param[in] items
1506  *   Pointer to the list of items.
1507  * @param[out] item_flags
1508  *   Pointer to the detected items.
1509  *
1510  * @return
1511  *   Maximum size of memory for items.
1512  */
1513 static int
1514 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1515                             const struct rte_flow_item items[],
1516                             uint64_t *item_flags)
1517 {
1518         int size = 0;
1519         uint64_t flags = 0;
1520
1521         size += SZ_NLATTR_STRZ_OF("flower") +
1522                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1523                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1524         if (attr->group > 0)
1525                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1526         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1527                 switch (items->type) {
1528                 case RTE_FLOW_ITEM_TYPE_VOID:
1529                         break;
1530                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1531                         break;
1532                 case RTE_FLOW_ITEM_TYPE_ETH:
1533                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1534                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1535                                 /* dst/src MAC addr and mask. */
1536                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1537                         break;
1538                 case RTE_FLOW_ITEM_TYPE_VLAN:
1539                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1540                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1541                                 /* VLAN Ether type. */
1542                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1543                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1544                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1545                         break;
1546                 case RTE_FLOW_ITEM_TYPE_IPV4:
1547                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1548                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1549                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1550                                 /* dst/src IP addr and mask. */
1551                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1552                         break;
1553                 case RTE_FLOW_ITEM_TYPE_IPV6:
1554                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1555                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1556                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1557                                 /* dst/src IP addr and mask. */
1558                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1559                         break;
1560                 case RTE_FLOW_ITEM_TYPE_UDP:
1561                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1562                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1563                                 /* dst/src port and mask. */
1564                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1565                         break;
1566                 case RTE_FLOW_ITEM_TYPE_TCP:
1567                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1568                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1569                                 /* dst/src port and mask. */
1570                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1571                         break;
1572                 default:
1573                         DRV_LOG(WARNING,
1574                                 "unsupported item %p type %d,"
1575                                 " items must be validated before flow creation",
1576                                 (const void *)items, items->type);
1577                         break;
1578                 }
1579         }
1580         *item_flags = flags;
1581         return size;
1582 }
1583
1584 /**
1585  * Calculate maximum size of memory for flow actions of Linux TC flower and
1586  * extract specified actions.
1587  *
1588  * @param[in] actions
1589  *   Pointer to the list of actions.
1590  * @param[out] action_flags
1591  *   Pointer to the detected actions.
1592  *
1593  * @return
1594  *   Maximum size of memory for actions.
1595  */
1596 static int
1597 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1598                               uint64_t *action_flags)
1599 {
1600         int size = 0;
1601         uint64_t flags = 0;
1602
1603         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1604         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1605                 switch (actions->type) {
1606                 case RTE_FLOW_ACTION_TYPE_VOID:
1607                         break;
1608                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1609                         size += SZ_NLATTR_NEST + /* na_act_index. */
1610                                 SZ_NLATTR_STRZ_OF("mirred") +
1611                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1612                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1613                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1614                         break;
1615                 case RTE_FLOW_ACTION_TYPE_JUMP:
1616                         size += SZ_NLATTR_NEST + /* na_act_index. */
1617                                 SZ_NLATTR_STRZ_OF("gact") +
1618                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1619                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1620                         flags |= MLX5_FLOW_ACTION_JUMP;
1621                         break;
1622                 case RTE_FLOW_ACTION_TYPE_DROP:
1623                         size += SZ_NLATTR_NEST + /* na_act_index. */
1624                                 SZ_NLATTR_STRZ_OF("gact") +
1625                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1626                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1627                         flags |= MLX5_FLOW_ACTION_DROP;
1628                         break;
1629                 case RTE_FLOW_ACTION_TYPE_COUNT:
1630                         break;
1631                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1632                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1633                         goto action_of_vlan;
1634                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1635                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1636                         goto action_of_vlan;
1637                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1638                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1639                         goto action_of_vlan;
1640                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1641                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1642                         goto action_of_vlan;
1643 action_of_vlan:
1644                         size += SZ_NLATTR_NEST + /* na_act_index. */
1645                                 SZ_NLATTR_STRZ_OF("vlan") +
1646                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1647                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1648                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1649                                 /* VLAN protocol. */
1650                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1651                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1652                         break;
1653                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1654                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1655                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1656                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1657                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1658                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1659                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1660                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1661                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1662                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1663                         size += flow_tcf_get_pedit_actions_size(&actions,
1664                                                                 &flags);
1665                         break;
1666                 default:
1667                         DRV_LOG(WARNING,
1668                                 "unsupported action %p type %d,"
1669                                 " items must be validated before flow creation",
1670                                 (const void *)actions, actions->type);
1671                         break;
1672                 }
1673         }
1674         *action_flags = flags;
1675         return size;
1676 }
1677
1678 /**
1679  * Brand rtnetlink buffer with unique handle.
1680  *
1681  * This handle should be unique for a given network interface to avoid
1682  * collisions.
1683  *
1684  * @param nlh
1685  *   Pointer to Netlink message.
1686  * @param handle
1687  *   Unique 32-bit handle to use.
1688  */
1689 static void
1690 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1691 {
1692         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1693
1694         tcm->tcm_handle = handle;
1695         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1696                 (void *)nlh, handle);
1697 }
1698
1699 /**
1700  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1701  * memory required, allocates the memory, initializes Netlink message headers
1702  * and set unique TC message handle.
1703  *
1704  * @param[in] attr
1705  *   Pointer to the flow attributes.
1706  * @param[in] items
1707  *   Pointer to the list of items.
1708  * @param[in] actions
1709  *   Pointer to the list of actions.
1710  * @param[out] item_flags
1711  *   Pointer to bit mask of all items detected.
1712  * @param[out] action_flags
1713  *   Pointer to bit mask of all actions detected.
1714  * @param[out] error
1715  *   Pointer to the error structure.
1716  *
1717  * @return
1718  *   Pointer to mlx5_flow object on success,
1719  *   otherwise NULL and rte_ernno is set.
1720  */
1721 static struct mlx5_flow *
1722 flow_tcf_prepare(const struct rte_flow_attr *attr,
1723                  const struct rte_flow_item items[],
1724                  const struct rte_flow_action actions[],
1725                  uint64_t *item_flags, uint64_t *action_flags,
1726                  struct rte_flow_error *error)
1727 {
1728         size_t size = sizeof(struct mlx5_flow) +
1729                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1730                       MNL_ALIGN(sizeof(struct tcmsg));
1731         struct mlx5_flow *dev_flow;
1732         struct nlmsghdr *nlh;
1733         struct tcmsg *tcm;
1734
1735         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1736         size += flow_tcf_get_actions_and_size(actions, action_flags);
1737         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1738         if (!dev_flow) {
1739                 rte_flow_error_set(error, ENOMEM,
1740                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1741                                    "not enough memory to create E-Switch flow");
1742                 return NULL;
1743         }
1744         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1745         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1746         *dev_flow = (struct mlx5_flow){
1747                 .tcf = (struct mlx5_flow_tcf){
1748                         .nlh = nlh,
1749                         .tcm = tcm,
1750                 },
1751         };
1752         /*
1753          * Generate a reasonably unique handle based on the address of the
1754          * target buffer.
1755          *
1756          * This is straightforward on 32-bit systems where the flow pointer can
1757          * be used directly. Otherwise, its least significant part is taken
1758          * after shifting it by the previous power of two of the pointed buffer
1759          * size.
1760          */
1761         if (sizeof(dev_flow) <= 4)
1762                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1763         else
1764                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1765                                        rte_log2_u32(rte_align32prevpow2(size)));
1766         return dev_flow;
1767 }
1768
1769 /**
1770  * Make adjustments for supporting count actions.
1771  *
1772  * @param[in] dev
1773  *   Pointer to the Ethernet device structure.
1774  * @param[in] dev_flow
1775  *   Pointer to mlx5_flow.
1776  * @param[out] error
1777  *   Pointer to error structure.
1778  *
1779  * @return
1780  *   0 On success else a negative errno value is returned and rte_errno is set.
1781  */
1782 static int
1783 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1784                                   struct mlx5_flow *dev_flow,
1785                                   struct rte_flow_error *error)
1786 {
1787         struct rte_flow *flow = dev_flow->flow;
1788
1789         if (!flow->counter) {
1790                 flow->counter = flow_tcf_counter_new();
1791                 if (!flow->counter)
1792                         return rte_flow_error_set(error, rte_errno,
1793                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1794                                                   NULL,
1795                                                   "cannot get counter"
1796                                                   " context.");
1797         }
1798         return 0;
1799 }
1800
1801 /**
1802  * Translate flow for Linux TC flower and construct Netlink message.
1803  *
1804  * @param[in] priv
1805  *   Pointer to the priv structure.
1806  * @param[in, out] flow
1807  *   Pointer to the sub flow.
1808  * @param[in] attr
1809  *   Pointer to the flow attributes.
1810  * @param[in] items
1811  *   Pointer to the list of items.
1812  * @param[in] actions
1813  *   Pointer to the list of actions.
1814  * @param[out] error
1815  *   Pointer to the error structure.
1816  *
1817  * @return
1818  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1819  */
1820 static int
1821 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1822                    const struct rte_flow_attr *attr,
1823                    const struct rte_flow_item items[],
1824                    const struct rte_flow_action actions[],
1825                    struct rte_flow_error *error)
1826 {
1827         union {
1828                 const struct rte_flow_item_port_id *port_id;
1829                 const struct rte_flow_item_eth *eth;
1830                 const struct rte_flow_item_vlan *vlan;
1831                 const struct rte_flow_item_ipv4 *ipv4;
1832                 const struct rte_flow_item_ipv6 *ipv6;
1833                 const struct rte_flow_item_tcp *tcp;
1834                 const struct rte_flow_item_udp *udp;
1835         } spec, mask;
1836         union {
1837                 const struct rte_flow_action_port_id *port_id;
1838                 const struct rte_flow_action_jump *jump;
1839                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1840                 const struct rte_flow_action_of_set_vlan_vid *
1841                         of_set_vlan_vid;
1842                 const struct rte_flow_action_of_set_vlan_pcp *
1843                         of_set_vlan_pcp;
1844         } conf;
1845         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1846         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1847         struct tcmsg *tcm = dev_flow->tcf.tcm;
1848         uint32_t na_act_index_cur;
1849         bool eth_type_set = 0;
1850         bool vlan_present = 0;
1851         bool vlan_eth_type_set = 0;
1852         bool ip_proto_set = 0;
1853         struct nlattr *na_flower;
1854         struct nlattr *na_flower_act;
1855         struct nlattr *na_vlan_id = NULL;
1856         struct nlattr *na_vlan_priority = NULL;
1857         uint64_t item_flags = 0;
1858         int ret;
1859
1860         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1861                                                 PTOI_TABLE_SZ_MAX(dev)));
1862         nlh = dev_flow->tcf.nlh;
1863         tcm = dev_flow->tcf.tcm;
1864         /* Prepare API must have been called beforehand. */
1865         assert(nlh != NULL && tcm != NULL);
1866         tcm->tcm_family = AF_UNSPEC;
1867         tcm->tcm_ifindex = ptoi[0].ifindex;
1868         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1869         /*
1870          * Priority cannot be zero to prevent the kernel from picking one
1871          * automatically.
1872          */
1873         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1874                                   RTE_BE16(ETH_P_ALL));
1875         if (attr->group > 0)
1876                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1877         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1878         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1879         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1880         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1881                 unsigned int i;
1882
1883                 switch (items->type) {
1884                 case RTE_FLOW_ITEM_TYPE_VOID:
1885                         break;
1886                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1887                         mask.port_id = flow_tcf_item_mask
1888                                 (items, &rte_flow_item_port_id_mask,
1889                                  &flow_tcf_mask_supported.port_id,
1890                                  &flow_tcf_mask_empty.port_id,
1891                                  sizeof(flow_tcf_mask_supported.port_id),
1892                                  error);
1893                         assert(mask.port_id);
1894                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1895                                 break;
1896                         spec.port_id = items->spec;
1897                         if (!mask.port_id->id)
1898                                 i = 0;
1899                         else
1900                                 for (i = 0; ptoi[i].ifindex; ++i)
1901                                         if (ptoi[i].port_id == spec.port_id->id)
1902                                                 break;
1903                         assert(ptoi[i].ifindex);
1904                         tcm->tcm_ifindex = ptoi[i].ifindex;
1905                         break;
1906                 case RTE_FLOW_ITEM_TYPE_ETH:
1907                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1908                         mask.eth = flow_tcf_item_mask
1909                                 (items, &rte_flow_item_eth_mask,
1910                                  &flow_tcf_mask_supported.eth,
1911                                  &flow_tcf_mask_empty.eth,
1912                                  sizeof(flow_tcf_mask_supported.eth),
1913                                  error);
1914                         assert(mask.eth);
1915                         if (mask.eth == &flow_tcf_mask_empty.eth)
1916                                 break;
1917                         spec.eth = items->spec;
1918                         if (mask.eth->type) {
1919                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1920                                                  spec.eth->type);
1921                                 eth_type_set = 1;
1922                         }
1923                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1924                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1925                                              ETHER_ADDR_LEN,
1926                                              spec.eth->dst.addr_bytes);
1927                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1928                                              ETHER_ADDR_LEN,
1929                                              mask.eth->dst.addr_bytes);
1930                         }
1931                         if (!is_zero_ether_addr(&mask.eth->src)) {
1932                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1933                                              ETHER_ADDR_LEN,
1934                                              spec.eth->src.addr_bytes);
1935                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1936                                              ETHER_ADDR_LEN,
1937                                              mask.eth->src.addr_bytes);
1938                         }
1939                         break;
1940                 case RTE_FLOW_ITEM_TYPE_VLAN:
1941                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1942                         mask.vlan = flow_tcf_item_mask
1943                                 (items, &rte_flow_item_vlan_mask,
1944                                  &flow_tcf_mask_supported.vlan,
1945                                  &flow_tcf_mask_empty.vlan,
1946                                  sizeof(flow_tcf_mask_supported.vlan),
1947                                  error);
1948                         assert(mask.vlan);
1949                         if (!eth_type_set)
1950                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1951                                                  RTE_BE16(ETH_P_8021Q));
1952                         eth_type_set = 1;
1953                         vlan_present = 1;
1954                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1955                                 break;
1956                         spec.vlan = items->spec;
1957                         if (mask.vlan->inner_type) {
1958                                 mnl_attr_put_u16(nlh,
1959                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1960                                                  spec.vlan->inner_type);
1961                                 vlan_eth_type_set = 1;
1962                         }
1963                         if (mask.vlan->tci & RTE_BE16(0xe000))
1964                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1965                                                 (rte_be_to_cpu_16
1966                                                  (spec.vlan->tci) >> 13) & 0x7);
1967                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1968                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1969                                                  rte_be_to_cpu_16
1970                                                  (spec.vlan->tci &
1971                                                   RTE_BE16(0x0fff)));
1972                         break;
1973                 case RTE_FLOW_ITEM_TYPE_IPV4:
1974                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1975                         mask.ipv4 = flow_tcf_item_mask
1976                                 (items, &rte_flow_item_ipv4_mask,
1977                                  &flow_tcf_mask_supported.ipv4,
1978                                  &flow_tcf_mask_empty.ipv4,
1979                                  sizeof(flow_tcf_mask_supported.ipv4),
1980                                  error);
1981                         assert(mask.ipv4);
1982                         if (!eth_type_set || !vlan_eth_type_set)
1983                                 mnl_attr_put_u16(nlh,
1984                                                  vlan_present ?
1985                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1986                                                  TCA_FLOWER_KEY_ETH_TYPE,
1987                                                  RTE_BE16(ETH_P_IP));
1988                         eth_type_set = 1;
1989                         vlan_eth_type_set = 1;
1990                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1991                                 break;
1992                         spec.ipv4 = items->spec;
1993                         if (mask.ipv4->hdr.next_proto_id) {
1994                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1995                                                 spec.ipv4->hdr.next_proto_id);
1996                                 ip_proto_set = 1;
1997                         }
1998                         if (mask.ipv4->hdr.src_addr) {
1999                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
2000                                                  spec.ipv4->hdr.src_addr);
2001                                 mnl_attr_put_u32(nlh,
2002                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
2003                                                  mask.ipv4->hdr.src_addr);
2004                         }
2005                         if (mask.ipv4->hdr.dst_addr) {
2006                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
2007                                                  spec.ipv4->hdr.dst_addr);
2008                                 mnl_attr_put_u32(nlh,
2009                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
2010                                                  mask.ipv4->hdr.dst_addr);
2011                         }
2012                         break;
2013                 case RTE_FLOW_ITEM_TYPE_IPV6:
2014                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2015                         mask.ipv6 = flow_tcf_item_mask
2016                                 (items, &rte_flow_item_ipv6_mask,
2017                                  &flow_tcf_mask_supported.ipv6,
2018                                  &flow_tcf_mask_empty.ipv6,
2019                                  sizeof(flow_tcf_mask_supported.ipv6),
2020                                  error);
2021                         assert(mask.ipv6);
2022                         if (!eth_type_set || !vlan_eth_type_set)
2023                                 mnl_attr_put_u16(nlh,
2024                                                  vlan_present ?
2025                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
2026                                                  TCA_FLOWER_KEY_ETH_TYPE,
2027                                                  RTE_BE16(ETH_P_IPV6));
2028                         eth_type_set = 1;
2029                         vlan_eth_type_set = 1;
2030                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
2031                                 break;
2032                         spec.ipv6 = items->spec;
2033                         if (mask.ipv6->hdr.proto) {
2034                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2035                                                 spec.ipv6->hdr.proto);
2036                                 ip_proto_set = 1;
2037                         }
2038                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
2039                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
2040                                              sizeof(spec.ipv6->hdr.src_addr),
2041                                              spec.ipv6->hdr.src_addr);
2042                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
2043                                              sizeof(mask.ipv6->hdr.src_addr),
2044                                              mask.ipv6->hdr.src_addr);
2045                         }
2046                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
2047                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
2048                                              sizeof(spec.ipv6->hdr.dst_addr),
2049                                              spec.ipv6->hdr.dst_addr);
2050                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
2051                                              sizeof(mask.ipv6->hdr.dst_addr),
2052                                              mask.ipv6->hdr.dst_addr);
2053                         }
2054                         break;
2055                 case RTE_FLOW_ITEM_TYPE_UDP:
2056                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2057                         mask.udp = flow_tcf_item_mask
2058                                 (items, &rte_flow_item_udp_mask,
2059                                  &flow_tcf_mask_supported.udp,
2060                                  &flow_tcf_mask_empty.udp,
2061                                  sizeof(flow_tcf_mask_supported.udp),
2062                                  error);
2063                         assert(mask.udp);
2064                         if (!ip_proto_set)
2065                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2066                                                 IPPROTO_UDP);
2067                         if (mask.udp == &flow_tcf_mask_empty.udp)
2068                                 break;
2069                         spec.udp = items->spec;
2070                         if (mask.udp->hdr.src_port) {
2071                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
2072                                                  spec.udp->hdr.src_port);
2073                                 mnl_attr_put_u16(nlh,
2074                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
2075                                                  mask.udp->hdr.src_port);
2076                         }
2077                         if (mask.udp->hdr.dst_port) {
2078                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
2079                                                  spec.udp->hdr.dst_port);
2080                                 mnl_attr_put_u16(nlh,
2081                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
2082                                                  mask.udp->hdr.dst_port);
2083                         }
2084                         break;
2085                 case RTE_FLOW_ITEM_TYPE_TCP:
2086                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2087                         mask.tcp = flow_tcf_item_mask
2088                                 (items, &rte_flow_item_tcp_mask,
2089                                  &flow_tcf_mask_supported.tcp,
2090                                  &flow_tcf_mask_empty.tcp,
2091                                  sizeof(flow_tcf_mask_supported.tcp),
2092                                  error);
2093                         assert(mask.tcp);
2094                         if (!ip_proto_set)
2095                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2096                                                 IPPROTO_TCP);
2097                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
2098                                 break;
2099                         spec.tcp = items->spec;
2100                         if (mask.tcp->hdr.src_port) {
2101                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2102                                                  spec.tcp->hdr.src_port);
2103                                 mnl_attr_put_u16(nlh,
2104                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
2105                                                  mask.tcp->hdr.src_port);
2106                         }
2107                         if (mask.tcp->hdr.dst_port) {
2108                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2109                                                  spec.tcp->hdr.dst_port);
2110                                 mnl_attr_put_u16(nlh,
2111                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
2112                                                  mask.tcp->hdr.dst_port);
2113                         }
2114                         if (mask.tcp->hdr.tcp_flags) {
2115                                 mnl_attr_put_u16
2116                                         (nlh,
2117                                          TCA_FLOWER_KEY_TCP_FLAGS,
2118                                          rte_cpu_to_be_16
2119                                                 (spec.tcp->hdr.tcp_flags));
2120                                 mnl_attr_put_u16
2121                                         (nlh,
2122                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2123                                          rte_cpu_to_be_16
2124                                                 (mask.tcp->hdr.tcp_flags));
2125                         }
2126                         break;
2127                 default:
2128                         return rte_flow_error_set(error, ENOTSUP,
2129                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2130                                                   NULL, "item not supported");
2131                 }
2132         }
2133         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2134         na_act_index_cur = 1;
2135         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2136                 struct nlattr *na_act_index;
2137                 struct nlattr *na_act;
2138                 unsigned int vlan_act;
2139                 unsigned int i;
2140
2141                 switch (actions->type) {
2142                 case RTE_FLOW_ACTION_TYPE_VOID:
2143                         break;
2144                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2145                         conf.port_id = actions->conf;
2146                         if (conf.port_id->original)
2147                                 i = 0;
2148                         else
2149                                 for (i = 0; ptoi[i].ifindex; ++i)
2150                                         if (ptoi[i].port_id == conf.port_id->id)
2151                                                 break;
2152                         assert(ptoi[i].ifindex);
2153                         na_act_index =
2154                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2155                         assert(na_act_index);
2156                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2157                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2158                         assert(na_act);
2159                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2160                                      sizeof(struct tc_mirred),
2161                                      &(struct tc_mirred){
2162                                         .action = TC_ACT_STOLEN,
2163                                         .eaction = TCA_EGRESS_REDIR,
2164                                         .ifindex = ptoi[i].ifindex,
2165                                      });
2166                         mnl_attr_nest_end(nlh, na_act);
2167                         mnl_attr_nest_end(nlh, na_act_index);
2168                         break;
2169                 case RTE_FLOW_ACTION_TYPE_JUMP:
2170                         conf.jump = actions->conf;
2171                         na_act_index =
2172                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2173                         assert(na_act_index);
2174                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2175                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2176                         assert(na_act);
2177                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2178                                      sizeof(struct tc_gact),
2179                                      &(struct tc_gact){
2180                                         .action = TC_ACT_GOTO_CHAIN |
2181                                                   conf.jump->group,
2182                                      });
2183                         mnl_attr_nest_end(nlh, na_act);
2184                         mnl_attr_nest_end(nlh, na_act_index);
2185                         break;
2186                 case RTE_FLOW_ACTION_TYPE_DROP:
2187                         na_act_index =
2188                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2189                         assert(na_act_index);
2190                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2191                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2192                         assert(na_act);
2193                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2194                                      sizeof(struct tc_gact),
2195                                      &(struct tc_gact){
2196                                         .action = TC_ACT_SHOT,
2197                                      });
2198                         mnl_attr_nest_end(nlh, na_act);
2199                         mnl_attr_nest_end(nlh, na_act_index);
2200                         break;
2201                 case RTE_FLOW_ACTION_TYPE_COUNT:
2202                         /*
2203                          * Driver adds the count action implicitly for
2204                          * each rule it creates.
2205                          */
2206                         ret = flow_tcf_translate_action_count(dev,
2207                                                               dev_flow, error);
2208                         if (ret < 0)
2209                                 return ret;
2210                         break;
2211                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2212                         conf.of_push_vlan = NULL;
2213                         vlan_act = TCA_VLAN_ACT_POP;
2214                         goto action_of_vlan;
2215                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2216                         conf.of_push_vlan = actions->conf;
2217                         vlan_act = TCA_VLAN_ACT_PUSH;
2218                         goto action_of_vlan;
2219                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2220                         conf.of_set_vlan_vid = actions->conf;
2221                         if (na_vlan_id)
2222                                 goto override_na_vlan_id;
2223                         vlan_act = TCA_VLAN_ACT_MODIFY;
2224                         goto action_of_vlan;
2225                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2226                         conf.of_set_vlan_pcp = actions->conf;
2227                         if (na_vlan_priority)
2228                                 goto override_na_vlan_priority;
2229                         vlan_act = TCA_VLAN_ACT_MODIFY;
2230                         goto action_of_vlan;
2231 action_of_vlan:
2232                         na_act_index =
2233                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2234                         assert(na_act_index);
2235                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2236                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2237                         assert(na_act);
2238                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
2239                                      sizeof(struct tc_vlan),
2240                                      &(struct tc_vlan){
2241                                         .action = TC_ACT_PIPE,
2242                                         .v_action = vlan_act,
2243                                      });
2244                         if (vlan_act == TCA_VLAN_ACT_POP) {
2245                                 mnl_attr_nest_end(nlh, na_act);
2246                                 mnl_attr_nest_end(nlh, na_act_index);
2247                                 break;
2248                         }
2249                         if (vlan_act == TCA_VLAN_ACT_PUSH)
2250                                 mnl_attr_put_u16(nlh,
2251                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
2252                                                  conf.of_push_vlan->ethertype);
2253                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2254                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2255                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2256                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2257                         mnl_attr_nest_end(nlh, na_act);
2258                         mnl_attr_nest_end(nlh, na_act_index);
2259                         if (actions->type ==
2260                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2261 override_na_vlan_id:
2262                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2263                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2264                                         rte_be_to_cpu_16
2265                                         (conf.of_set_vlan_vid->vlan_vid);
2266                         } else if (actions->type ==
2267                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2268 override_na_vlan_priority:
2269                                 na_vlan_priority->nla_type =
2270                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2271                                 *(uint8_t *)mnl_attr_get_payload
2272                                         (na_vlan_priority) =
2273                                         conf.of_set_vlan_pcp->vlan_pcp;
2274                         }
2275                         break;
2276                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2277                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2278                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2279                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2280                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2281                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2282                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2283                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2284                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2285                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2286                         na_act_index =
2287                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2288                         flow_tcf_create_pedit_mnl_msg(nlh,
2289                                                       &actions, item_flags);
2290                         mnl_attr_nest_end(nlh, na_act_index);
2291                         break;
2292                 default:
2293                         return rte_flow_error_set(error, ENOTSUP,
2294                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2295                                                   actions,
2296                                                   "action not supported");
2297                 }
2298         }
2299         assert(na_flower);
2300         assert(na_flower_act);
2301         mnl_attr_nest_end(nlh, na_flower_act);
2302         mnl_attr_nest_end(nlh, na_flower);
2303         return 0;
2304 }
2305
2306 /**
2307  * Send Netlink message with acknowledgment.
2308  *
2309  * @param ctx
2310  *   Flow context to use.
2311  * @param nlh
2312  *   Message to send. This function always raises the NLM_F_ACK flag before
2313  *   sending.
2314  *
2315  * @return
2316  *   0 on success, a negative errno value otherwise and rte_errno is set.
2317  */
2318 static int
2319 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2320 {
2321         alignas(struct nlmsghdr)
2322         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2323                     nlh->nlmsg_len - sizeof(*nlh)];
2324         uint32_t seq = ctx->seq++;
2325         struct mnl_socket *nl = ctx->nl;
2326         int ret;
2327
2328         nlh->nlmsg_flags |= NLM_F_ACK;
2329         nlh->nlmsg_seq = seq;
2330         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2331         if (ret != -1)
2332                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2333         if (ret != -1)
2334                 ret = mnl_cb_run
2335                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2336         if (ret > 0)
2337                 return 0;
2338         rte_errno = errno;
2339         return -rte_errno;
2340 }
2341
2342 /**
2343  * Apply flow to E-Switch by sending Netlink message.
2344  *
2345  * @param[in] dev
2346  *   Pointer to Ethernet device.
2347  * @param[in, out] flow
2348  *   Pointer to the sub flow.
2349  * @param[out] error
2350  *   Pointer to the error structure.
2351  *
2352  * @return
2353  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2354  */
2355 static int
2356 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2357                struct rte_flow_error *error)
2358 {
2359         struct priv *priv = dev->data->dev_private;
2360         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2361         struct mlx5_flow *dev_flow;
2362         struct nlmsghdr *nlh;
2363
2364         dev_flow = LIST_FIRST(&flow->dev_flows);
2365         /* E-Switch flow can't be expanded. */
2366         assert(!LIST_NEXT(dev_flow, next));
2367         nlh = dev_flow->tcf.nlh;
2368         nlh->nlmsg_type = RTM_NEWTFILTER;
2369         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2370         if (!flow_tcf_nl_ack(ctx, nlh))
2371                 return 0;
2372         return rte_flow_error_set(error, rte_errno,
2373                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2374                                   "netlink: failed to create TC flow rule");
2375 }
2376
2377 /**
2378  * Remove flow from E-Switch by sending Netlink message.
2379  *
2380  * @param[in] dev
2381  *   Pointer to Ethernet device.
2382  * @param[in, out] flow
2383  *   Pointer to the sub flow.
2384  */
2385 static void
2386 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2387 {
2388         struct priv *priv = dev->data->dev_private;
2389         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2390         struct mlx5_flow *dev_flow;
2391         struct nlmsghdr *nlh;
2392
2393         if (!flow)
2394                 return;
2395         if (flow->counter) {
2396                 if (--flow->counter->ref_cnt == 0) {
2397                         rte_free(flow->counter);
2398                         flow->counter = NULL;
2399                 }
2400         }
2401         dev_flow = LIST_FIRST(&flow->dev_flows);
2402         if (!dev_flow)
2403                 return;
2404         /* E-Switch flow can't be expanded. */
2405         assert(!LIST_NEXT(dev_flow, next));
2406         nlh = dev_flow->tcf.nlh;
2407         nlh->nlmsg_type = RTM_DELTFILTER;
2408         nlh->nlmsg_flags = NLM_F_REQUEST;
2409         flow_tcf_nl_ack(ctx, nlh);
2410 }
2411
2412 /**
2413  * Remove flow from E-Switch and release resources of the device flow.
2414  *
2415  * @param[in] dev
2416  *   Pointer to Ethernet device.
2417  * @param[in, out] flow
2418  *   Pointer to the sub flow.
2419  */
2420 static void
2421 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2422 {
2423         struct mlx5_flow *dev_flow;
2424
2425         if (!flow)
2426                 return;
2427         flow_tcf_remove(dev, flow);
2428         dev_flow = LIST_FIRST(&flow->dev_flows);
2429         if (!dev_flow)
2430                 return;
2431         /* E-Switch flow can't be expanded. */
2432         assert(!LIST_NEXT(dev_flow, next));
2433         LIST_REMOVE(dev_flow, next);
2434         rte_free(dev_flow);
2435 }
2436
2437 /**
2438  * Helper routine for figuring the space size required for a parse buffer.
2439  *
2440  * @param array
2441  *   array of values to use.
2442  * @param idx
2443  *   Current location in array.
2444  * @param value
2445  *   Value to compare with.
2446  *
2447  * @return
2448  *   The maximum between the given value and the array value on index.
2449  */
2450 static uint16_t
2451 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2452 {
2453         return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2454 }
2455
2456 /**
2457  * Parse rtnetlink message attributes filling the attribute table with the info
2458  * retrieved.
2459  *
2460  * @param tb
2461  *   Attribute table to be filled.
2462  * @param[out] max
2463  *   Maxinum entry in the attribute table.
2464  * @param rte
2465  *   The attributes section in the message to be parsed.
2466  * @param len
2467  *   The length of the attributes section in the message.
2468  */
2469 static void
2470 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2471                          struct rtattr *rta, int len)
2472 {
2473         unsigned short type;
2474         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2475         while (RTA_OK(rta, len)) {
2476                 type = rta->rta_type;
2477                 if (type <= max && !tb[type])
2478                         tb[type] = rta;
2479                 rta = RTA_NEXT(rta, len);
2480         }
2481 }
2482
2483 /**
2484  * Extract flow counters from flower action.
2485  *
2486  * @param rta
2487  *   flower action stats properties in the Netlink message received.
2488  * @param rta_type
2489  *   The backward sequence of rta_types, as written in the attribute table,
2490  *   we need to traverse in order to get to the requested object.
2491  * @param idx
2492  *   Current location in rta_type table.
2493  * @param[out] data
2494  *   data holding the count statistics of the rte_flow retrieved from
2495  *   the message.
2496  *
2497  * @return
2498  *   0 if data was found and retrieved, -1 otherwise.
2499  */
2500 static int
2501 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2502                                        uint16_t rta_type[], int idx,
2503                                        struct gnet_stats_basic *data)
2504 {
2505         int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2506                                                  TCA_STATS_BASIC);
2507         struct rtattr *tbs[tca_stats_max + 1];
2508
2509         if (rta == NULL || idx < 0)
2510                 return -1;
2511         flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2512                                  RTA_DATA(rta), RTA_PAYLOAD(rta));
2513         switch (rta_type[idx]) {
2514         case TCA_STATS_BASIC:
2515                 if (tbs[TCA_STATS_BASIC]) {
2516                         memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2517                                RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2518                                sizeof(*data)));
2519                         return 0;
2520                 }
2521                 break;
2522         default:
2523                 break;
2524         }
2525         return -1;
2526 }
2527
2528 /**
2529  * Parse flower single action retrieving the requested action attribute,
2530  * if found.
2531  *
2532  * @param arg
2533  *   flower action properties in the Netlink message received.
2534  * @param rta_type
2535  *   The backward sequence of rta_types, as written in the attribute table,
2536  *   we need to traverse in order to get to the requested object.
2537  * @param idx
2538  *   Current location in rta_type table.
2539  * @param[out] data
2540  *   Count statistics retrieved from the message query.
2541  *
2542  * @return
2543  *   0 if data was found and retrieved, -1 otherwise.
2544  */
2545 static int
2546 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2547                                      uint16_t rta_type[], int idx, void *data)
2548 {
2549         int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2550         struct rtattr *tb[tca_act_max + 1];
2551
2552         if (arg == NULL || idx < 0)
2553                 return -1;
2554         flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2555                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2556         if (tb[TCA_ACT_KIND] == NULL)
2557                 return -1;
2558         switch (rta_type[idx]) {
2559         case TCA_ACT_STATS:
2560                 if (tb[TCA_ACT_STATS])
2561                         return flow_tcf_nl_action_stats_parse_and_get
2562                                         (tb[TCA_ACT_STATS],
2563                                          rta_type, --idx,
2564                                          (struct gnet_stats_basic *)data);
2565                 break;
2566         default:
2567                 break;
2568         }
2569         return -1;
2570 }
2571
2572 /**
2573  * Parse flower action section in the message retrieving the requested
2574  * attribute from the first action that provides it.
2575  *
2576  * @param opt
2577  *   flower section in the Netlink message received.
2578  * @param rta_type
2579  *   The backward sequence of rta_types, as written in the attribute table,
2580  *   we need to traverse in order to get to the requested object.
2581  * @param idx
2582  *   Current location in rta_type table.
2583  * @param[out] data
2584  *   data retrieved from the message query.
2585  *
2586  * @return
2587  *   0 if data was found and retrieved, -1 otherwise.
2588  */
2589 static int
2590 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2591                                  uint16_t rta_type[], int idx, void *data)
2592 {
2593         struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2594         int i;
2595
2596         if (arg == NULL || idx < 0)
2597                 return -1;
2598         flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2599                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2600         switch (rta_type[idx]) {
2601         /*
2602          * flow counters are stored in the actions defined by the flow
2603          * and not in the flow itself, therefore we need to traverse the
2604          * flower chain of actions in search for them.
2605          *
2606          * Note that the index is not decremented here.
2607          */
2608         case TCA_ACT_STATS:
2609                 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2610                         if (tb[i] &&
2611                         !flow_tcf_nl_parse_one_action_and_get(tb[i],
2612                                                               rta_type,
2613                                                               idx, data))
2614                                 return 0;
2615                 }
2616                 break;
2617         default:
2618                 break;
2619         }
2620         return -1;
2621 }
2622
2623 /**
2624  * Parse flower classifier options in the message, retrieving the requested
2625  * attribute if found.
2626  *
2627  * @param opt
2628  *   flower section in the Netlink message received.
2629  * @param rta_type
2630  *   The backward sequence of rta_types, as written in the attribute table,
2631  *   we need to traverse in order to get to the requested object.
2632  * @param idx
2633  *   Current location in rta_type table.
2634  * @param[out] data
2635  *   data retrieved from the message query.
2636  *
2637  * @return
2638  *   0 if data was found and retrieved, -1 otherwise.
2639  */
2640 static int
2641 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2642                                uint16_t rta_type[], int idx, void *data)
2643 {
2644         int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2645                                                   TCA_FLOWER_ACT);
2646         struct rtattr *tb[tca_flower_max + 1];
2647
2648         if (!opt || idx < 0)
2649                 return -1;
2650         flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2651                                  RTA_DATA(opt), RTA_PAYLOAD(opt));
2652         switch (rta_type[idx]) {
2653         case TCA_FLOWER_ACT:
2654                 if (tb[TCA_FLOWER_ACT])
2655                         return flow_tcf_nl_action_parse_and_get
2656                                                         (tb[TCA_FLOWER_ACT],
2657                                                          rta_type, --idx, data);
2658                 break;
2659         default:
2660                 break;
2661         }
2662         return -1;
2663 }
2664
2665 /**
2666  * Parse Netlink reply on filter query, retrieving the flow counters.
2667  *
2668  * @param nlh
2669  *   Message received from Netlink.
2670  * @param rta_type
2671  *   The backward sequence of rta_types, as written in the attribute table,
2672  *   we need to traverse in order to get to the requested object.
2673  * @param idx
2674  *   Current location in rta_type table.
2675  * @param[out] data
2676  *   data retrieved from the message query.
2677  *
2678  * @return
2679  *   0 if data was found and retrieved, -1 otherwise.
2680  */
2681 static int
2682 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2683                                  uint16_t rta_type[], int idx, void *data)
2684 {
2685         struct nlmsghdr *nlh = cnlh;
2686         struct tcmsg *t = NLMSG_DATA(nlh);
2687         int len = nlh->nlmsg_len;
2688         int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2689         struct rtattr *tb[tca_max + 1];
2690
2691         if (idx < 0)
2692                 return -1;
2693         if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2694             nlh->nlmsg_type != RTM_GETTFILTER &&
2695             nlh->nlmsg_type != RTM_DELTFILTER)
2696                 return -1;
2697         len -= NLMSG_LENGTH(sizeof(*t));
2698         if (len < 0)
2699                 return -1;
2700         flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2701         /* Not a TC flower flow - bail out */
2702         if (!tb[TCA_KIND] ||
2703             strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2704                 return -1;
2705         switch (rta_type[idx]) {
2706         case TCA_OPTIONS:
2707                 if (tb[TCA_OPTIONS])
2708                         return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2709                                                               rta_type,
2710                                                               --idx, data);
2711                 break;
2712         default:
2713                 break;
2714         }
2715         return -1;
2716 }
2717
2718 /**
2719  * A callback to parse Netlink reply on TC flower query.
2720  *
2721  * @param nlh
2722  *   Message received from Netlink.
2723  * @param[out] data
2724  *   Pointer to data area to be filled by the parsing routine.
2725  *   assumed to be a pinter to struct flow_tcf_stats_basic.
2726  *
2727  * @return
2728  *   MNL_CB_OK value.
2729  */
2730 static int
2731 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2732 {
2733         /*
2734          * The backward sequence of rta_types to pass in order to get
2735          *  to the counters.
2736          */
2737         uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2738                                 TCA_FLOWER_ACT, TCA_OPTIONS };
2739         struct flow_tcf_stats_basic *sb_data = data;
2740         union {
2741                 const struct nlmsghdr *c;
2742                 struct nlmsghdr *nc;
2743         } tnlh = { .c = nlh };
2744
2745         if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2746                                               RTE_DIM(rta_type) - 1,
2747                                               (void *)&sb_data->counters))
2748                 sb_data->valid = true;
2749         return MNL_CB_OK;
2750 }
2751
2752 /**
2753  * Query a TC flower rule for its statistics via netlink.
2754  *
2755  * @param[in] dev
2756  *   Pointer to Ethernet device.
2757  * @param[in] flow
2758  *   Pointer to the sub flow.
2759  * @param[out] data
2760  *   data retrieved by the query.
2761  * @param[out] error
2762  *   Perform verbose error reporting if not NULL.
2763  *
2764  * @return
2765  *   0 on success, a negative errno value otherwise and rte_errno is set.
2766  */
2767 static int
2768 flow_tcf_query_count(struct rte_eth_dev *dev,
2769                           struct rte_flow *flow,
2770                           void *data,
2771                           struct rte_flow_error *error)
2772 {
2773         struct flow_tcf_stats_basic sb_data = { 0 };
2774         struct rte_flow_query_count *qc = data;
2775         struct priv *priv = dev->data->dev_private;
2776         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2777         struct mnl_socket *nl = ctx->nl;
2778         struct mlx5_flow *dev_flow;
2779         struct nlmsghdr *nlh;
2780         uint32_t seq = priv->tcf_context->seq++;
2781         ssize_t ret;
2782         assert(qc);
2783
2784         dev_flow = LIST_FIRST(&flow->dev_flows);
2785         /* E-Switch flow can't be expanded. */
2786         assert(!LIST_NEXT(dev_flow, next));
2787         if (!dev_flow->flow->counter)
2788                 goto notsup_exit;
2789         nlh = dev_flow->tcf.nlh;
2790         nlh->nlmsg_type = RTM_GETTFILTER;
2791         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2792         nlh->nlmsg_seq = seq;
2793         if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2794                 goto error_exit;
2795         do {
2796                 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2797                 if (ret <= 0)
2798                         break;
2799                 ret = mnl_cb_run(ctx->buf, ret, seq,
2800                                  mnl_socket_get_portid(nl),
2801                                  flow_tcf_nl_message_get_stats_basic,
2802                                  (void *)&sb_data);
2803         } while (ret > 0);
2804         /* Return the delta from last reset. */
2805         if (sb_data.valid) {
2806                 /* Return the delta from last reset. */
2807                 qc->hits_set = 1;
2808                 qc->bytes_set = 1;
2809                 qc->hits = sb_data.counters.packets - flow->counter->hits;
2810                 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2811                 if (qc->reset) {
2812                         flow->counter->hits = sb_data.counters.packets;
2813                         flow->counter->bytes = sb_data.counters.bytes;
2814                 }
2815                 return 0;
2816         }
2817         return rte_flow_error_set(error, EINVAL,
2818                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2819                                   NULL,
2820                                   "flow does not have counter");
2821 error_exit:
2822         return rte_flow_error_set
2823                         (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2824                          NULL, "netlink: failed to read flow rule counters");
2825 notsup_exit:
2826         return rte_flow_error_set
2827                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2828                          NULL, "counters are not available.");
2829 }
2830
2831 /**
2832  * Query a flow.
2833  *
2834  * @see rte_flow_query()
2835  * @see rte_flow_ops
2836  */
2837 static int
2838 flow_tcf_query(struct rte_eth_dev *dev,
2839                struct rte_flow *flow,
2840                const struct rte_flow_action *actions,
2841                void *data,
2842                struct rte_flow_error *error)
2843 {
2844         int ret = -EINVAL;
2845
2846         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2847                 switch (actions->type) {
2848                 case RTE_FLOW_ACTION_TYPE_VOID:
2849                         break;
2850                 case RTE_FLOW_ACTION_TYPE_COUNT:
2851                         ret = flow_tcf_query_count(dev, flow, data, error);
2852                         break;
2853                 default:
2854                         return rte_flow_error_set(error, ENOTSUP,
2855                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2856                                                   actions,
2857                                                   "action not supported");
2858                 }
2859         }
2860         return ret;
2861 }
2862
2863 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2864         .validate = flow_tcf_validate,
2865         .prepare = flow_tcf_prepare,
2866         .translate = flow_tcf_translate,
2867         .apply = flow_tcf_apply,
2868         .remove = flow_tcf_remove,
2869         .destroy = flow_tcf_destroy,
2870         .query = flow_tcf_query,
2871 };
2872
2873 /**
2874  * Create and configure a libmnl socket for Netlink flow rules.
2875  *
2876  * @return
2877  *   A valid libmnl socket object pointer on success, NULL otherwise and
2878  *   rte_errno is set.
2879  */
2880 static struct mnl_socket *
2881 flow_tcf_mnl_socket_create(void)
2882 {
2883         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2884
2885         if (nl) {
2886                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2887                                       sizeof(int));
2888                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2889                         return nl;
2890         }
2891         rte_errno = errno;
2892         if (nl)
2893                 mnl_socket_close(nl);
2894         return NULL;
2895 }
2896
2897 /**
2898  * Destroy a libmnl socket.
2899  *
2900  * @param nl
2901  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2902  */
2903 static void
2904 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2905 {
2906         if (nl)
2907                 mnl_socket_close(nl);
2908 }
2909
2910 /**
2911  * Initialize ingress qdisc of a given network interface.
2912  *
2913  * @param ctx
2914  *   Pointer to tc-flower context to use.
2915  * @param ifindex
2916  *   Index of network interface to initialize.
2917  * @param[out] error
2918  *   Perform verbose error reporting if not NULL.
2919  *
2920  * @return
2921  *   0 on success, a negative errno value otherwise and rte_errno is set.
2922  */
2923 int
2924 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2925                    unsigned int ifindex, struct rte_flow_error *error)
2926 {
2927         struct nlmsghdr *nlh;
2928         struct tcmsg *tcm;
2929         alignas(struct nlmsghdr)
2930         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2931
2932         /* Destroy existing ingress qdisc and everything attached to it. */
2933         nlh = mnl_nlmsg_put_header(buf);
2934         nlh->nlmsg_type = RTM_DELQDISC;
2935         nlh->nlmsg_flags = NLM_F_REQUEST;
2936         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2937         tcm->tcm_family = AF_UNSPEC;
2938         tcm->tcm_ifindex = ifindex;
2939         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2940         tcm->tcm_parent = TC_H_INGRESS;
2941         /* Ignore errors when qdisc is already absent. */
2942         if (flow_tcf_nl_ack(ctx, nlh) &&
2943             rte_errno != EINVAL && rte_errno != ENOENT)
2944                 return rte_flow_error_set(error, rte_errno,
2945                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2946                                           "netlink: failed to remove ingress"
2947                                           " qdisc");
2948         /* Create fresh ingress qdisc. */
2949         nlh = mnl_nlmsg_put_header(buf);
2950         nlh->nlmsg_type = RTM_NEWQDISC;
2951         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2952         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2953         tcm->tcm_family = AF_UNSPEC;
2954         tcm->tcm_ifindex = ifindex;
2955         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2956         tcm->tcm_parent = TC_H_INGRESS;
2957         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2958         if (flow_tcf_nl_ack(ctx, nlh))
2959                 return rte_flow_error_set(error, rte_errno,
2960                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2961                                           "netlink: failed to create ingress"
2962                                           " qdisc");
2963         return 0;
2964 }
2965
2966 /**
2967  * Create libmnl context for Netlink flow rules.
2968  *
2969  * @return
2970  *   A valid libmnl socket object pointer on success, NULL otherwise and
2971  *   rte_errno is set.
2972  */
2973 struct mlx5_flow_tcf_context *
2974 mlx5_flow_tcf_context_create(void)
2975 {
2976         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2977                                                         sizeof(*ctx),
2978                                                         sizeof(uint32_t));
2979         if (!ctx)
2980                 goto error;
2981         ctx->nl = flow_tcf_mnl_socket_create();
2982         if (!ctx->nl)
2983                 goto error;
2984         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2985         ctx->buf = rte_zmalloc(__func__,
2986                                ctx->buf_size, sizeof(uint32_t));
2987         if (!ctx->buf)
2988                 goto error;
2989         ctx->seq = random();
2990         return ctx;
2991 error:
2992         mlx5_flow_tcf_context_destroy(ctx);
2993         return NULL;
2994 }
2995
2996 /**
2997  * Destroy a libmnl context.
2998  *
2999  * @param ctx
3000  *   Libmnl socket of the @p NETLINK_ROUTE kind.
3001  */
3002 void
3003 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
3004 {
3005         if (!ctx)
3006                 return;
3007         flow_tcf_mnl_socket_destroy(ctx->nl);
3008         rte_free(ctx->buf);
3009         rte_free(ctx);
3010 }